1//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// Here we implement the Flight Data Recorder mode for XRay, where we use
12// compact structures to store records in memory as well as when writing out the
13// data to files.
14//
15//===----------------------------------------------------------------------===//
16#include "xray_fdr_logging.h"
17#include <cassert>
18#include <errno.h>
19#include <limits>
20#include <memory>
21#include <pthread.h>
22#include <sys/time.h>
23#include <time.h>
24#include <unistd.h>
25
26#include "sanitizer_common/sanitizer_allocator_internal.h"
27#include "sanitizer_common/sanitizer_atomic.h"
28#include "sanitizer_common/sanitizer_common.h"
29#include "xray/xray_interface.h"
30#include "xray/xray_records.h"
31#include "xray_allocator.h"
32#include "xray_buffer_queue.h"
33#include "xray_defs.h"
34#include "xray_fdr_controller.h"
35#include "xray_fdr_flags.h"
36#include "xray_fdr_log_writer.h"
37#include "xray_flags.h"
38#include "xray_recursion_guard.h"
39#include "xray_tsc.h"
40#include "xray_utils.h"
41
42namespace __xray {
43
44static atomic_sint32_t LoggingStatus = {
45    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
46
47namespace {
48
49// Group together thread-local-data in a struct, then hide it behind a function
50// call so that it can be initialized on first use instead of as a global. We
51// force the alignment to 64-bytes for x86 cache line alignment, as this
52// structure is used in the hot path of implementation.
53struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
54  BufferQueue::Buffer Buffer{};
55  BufferQueue *BQ = nullptr;
56
57  using LogWriterStorage =
58      typename std::aligned_storage<sizeof(FDRLogWriter),
59                                    alignof(FDRLogWriter)>::type;
60
61  LogWriterStorage LWStorage;
62  FDRLogWriter *Writer = nullptr;
63
64  using ControllerStorage =
65      typename std::aligned_storage<sizeof(FDRController<>),
66                                    alignof(FDRController<>)>::type;
67  ControllerStorage CStorage;
68  FDRController<> *Controller = nullptr;
69};
70
71} // namespace
72
73static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
74              "ThreadLocalData must be trivially destructible");
75
76// Use a global pthread key to identify thread-local data for logging.
77static pthread_key_t Key;
78
79// Global BufferQueue.
80static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
81static BufferQueue *BQ = nullptr;
82
83// Global thresholds for function durations.
84static atomic_uint64_t ThresholdTicks{0};
85
86// Global for ticks per second.
87static atomic_uint64_t TicksPerSec{0};
88
89static atomic_sint32_t LogFlushStatus = {
90    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
91
92// This function will initialize the thread-local data structure used by the FDR
93// logging implementation and return a reference to it. The implementation
94// details require a bit of care to maintain.
95//
96// First, some requirements on the implementation in general:
97//
98//   - XRay handlers should not call any memory allocation routines that may
99//     delegate to an instrumented implementation. This means functions like
100//     malloc() and free() should not be called while instrumenting.
101//
102//   - We would like to use some thread-local data initialized on first-use of
103//     the XRay instrumentation. These allow us to implement unsynchronized
104//     routines that access resources associated with the thread.
105//
106// The implementation here uses a few mechanisms that allow us to provide both
107// the requirements listed above. We do this by:
108//
109//   1. Using a thread-local aligned storage buffer for representing the
110//      ThreadLocalData struct. This data will be uninitialized memory by
111//      design.
112//
113//   2. Not requiring a thread exit handler/implementation, keeping the
114//      thread-local as purely a collection of references/data that do not
115//      require cleanup.
116//
117// We're doing this to avoid using a `thread_local` object that has a
118// non-trivial destructor, because the C++ runtime might call std::malloc(...)
119// to register calls to destructors. Deadlocks may arise when, for example, an
120// externally provided malloc implementation is XRay instrumented, and
121// initializing the thread-locals involves calling into malloc. A malloc
122// implementation that does global synchronization might be holding a lock for a
123// critical section, calling a function that might be XRay instrumented (and
124// thus in turn calling into malloc by virtue of registration of the
125// thread_local's destructor).
126#if XRAY_HAS_TLS_ALIGNAS
127static_assert(alignof(ThreadLocalData) >= 64,
128              "ThreadLocalData must be cache line aligned.");
129#endif
130static ThreadLocalData &getThreadLocalData() {
131  thread_local typename std::aligned_storage<
132      sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
133
134  if (pthread_getspecific(Key) == NULL) {
135    new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
136    pthread_setspecific(Key, &TLDStorage);
137  }
138
139  return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
140}
141
142static XRayFileHeader &fdrCommonHeaderInfo() {
143  static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
144  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
145  static bool TSCSupported = true;
146  static uint64_t CycleFrequency = NanosecondsPerSecond;
147  pthread_once(
148      &OnceInit, +[] {
149        XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
150        // Version 2 of the log writes the extents of the buffer, instead of
151        // relying on an end-of-buffer record.
152        // Version 3 includes PID metadata record.
153        // Version 4 includes CPU data in the custom event records.
154        // Version 5 uses relative deltas for custom and typed event records,
155        // and removes the CPU data in custom event records (similar to how
156        // function records use deltas instead of full TSCs and rely on other
157        // metadata records for TSC wraparound and CPU migration).
158        H.Version = 5;
159        H.Type = FileTypes::FDR_LOG;
160
161        // Test for required CPU features and cache the cycle frequency
162        TSCSupported = probeRequiredCPUFeatures();
163        if (TSCSupported)
164          CycleFrequency = getTSCFrequency();
165        H.CycleFrequency = CycleFrequency;
166
167        // FIXME: Actually check whether we have 'constant_tsc' and
168        // 'nonstop_tsc' before setting the values in the header.
169        H.ConstantTSC = 1;
170        H.NonstopTSC = 1;
171      });
172  return reinterpret_cast<XRayFileHeader &>(HStorage);
173}
174
175// This is the iterator implementation, which knows how to handle FDR-mode
176// specific buffers. This is used as an implementation of the iterator function
177// needed by __xray_set_buffer_iterator(...). It maintains a global state of the
178// buffer iteration for the currently installed FDR mode buffers. In particular:
179//
180//   - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
181//     then the iterator returns the header information.
182//   - If the argument represents the header information ({address of header
183//     info, size of the header info}) then it returns the first FDR buffer's
184//     address and extents.
185//   - It will keep returning the next buffer and extents as there are more
186//     buffers to process. When the input represents the last buffer, it will
187//     return the initial state to signal completion ({nullptr, 0}).
188//
189// See xray/xray_log_interface.h for more details on the requirements for the
190// implementations of __xray_set_buffer_iterator(...) and
191// __xray_log_process_buffers(...).
192XRayBuffer fdrIterator(const XRayBuffer B) {
193  DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
194  DCHECK(BQ->finalizing());
195
196  if (BQ == nullptr || !BQ->finalizing()) {
197    if (Verbosity())
198      Report(
199          "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
200    return {nullptr, 0};
201  }
202
203  // We use a global scratch-pad for the header information, which only gets
204  // initialized the first time this function is called. We'll update one part
205  // of this information with some relevant data (in particular the number of
206  // buffers to expect).
207  static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage;
208  static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
209  pthread_once(
210      &HeaderOnce, +[] {
211        reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
212            fdrCommonHeaderInfo();
213      });
214
215  // We use a convenience alias for code referring to Header from here on out.
216  auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
217  if (B.Data == nullptr && B.Size == 0) {
218    Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
219    return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
220  }
221
222  static BufferQueue::const_iterator It{};
223  static BufferQueue::const_iterator End{};
224  static uint8_t *CurrentBuffer{nullptr};
225  static size_t SerializedBufferSize = 0;
226  if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
227    // From this point on, we provide raw access to the raw buffer we're getting
228    // from the BufferQueue. We're relying on the iterators from the current
229    // Buffer queue.
230    It = BQ->cbegin();
231    End = BQ->cend();
232  }
233
234  if (CurrentBuffer != nullptr) {
235    deallocateBuffer(CurrentBuffer, SerializedBufferSize);
236    CurrentBuffer = nullptr;
237  }
238
239  if (It == End)
240    return {nullptr, 0};
241
242  // Set up the current buffer to contain the extents like we would when writing
243  // out to disk. The difference here would be that we still write "empty"
244  // buffers, or at least go through the iterators faithfully to let the
245  // handlers see the empty buffers in the queue.
246  //
247  // We need this atomic fence here to ensure that writes happening to the
248  // buffer have been committed before we load the extents atomically. Because
249  // the buffer is not explicitly synchronised across threads, we rely on the
250  // fence ordering to ensure that writes we expect to have been completed
251  // before the fence are fully committed before we read the extents.
252  atomic_thread_fence(memory_order_acquire);
253  auto BufferSize = atomic_load(It->Extents, memory_order_acquire);
254  SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
255  CurrentBuffer = allocateBuffer(SerializedBufferSize);
256  if (CurrentBuffer == nullptr)
257    return {nullptr, 0};
258
259  // Write out the extents as a Metadata Record into the CurrentBuffer.
260  MetadataRecord ExtentsRecord;
261  ExtentsRecord.Type = uint8_t(RecordType::Metadata);
262  ExtentsRecord.RecordKind =
263      uint8_t(MetadataRecord::RecordKinds::BufferExtents);
264  internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
265  auto AfterExtents =
266      static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
267                                          sizeof(MetadataRecord))) +
268      sizeof(MetadataRecord);
269  internal_memcpy(AfterExtents, It->Data, BufferSize);
270
271  XRayBuffer Result;
272  Result.Data = CurrentBuffer;
273  Result.Size = SerializedBufferSize;
274  ++It;
275  return Result;
276}
277
278// Must finalize before flushing.
279XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
280  if (atomic_load(&LoggingStatus, memory_order_acquire) !=
281      XRayLogInitStatus::XRAY_LOG_FINALIZED) {
282    if (Verbosity())
283      Report("Not flushing log, implementation is not finalized.\n");
284    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
285  }
286
287  if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
288                      memory_order_release) ==
289      XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
290    if (Verbosity())
291      Report("Not flushing log, implementation is still flushing.\n");
292    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
293  }
294
295  if (BQ == nullptr) {
296    if (Verbosity())
297      Report("Cannot flush when global buffer queue is null.\n");
298    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
299  }
300
301  // We wait a number of milliseconds to allow threads to see that we've
302  // finalised before attempting to flush the log.
303  SleepForMillis(fdrFlags()->grace_period_ms);
304
305  // At this point, we're going to uninstall the iterator implementation, before
306  // we decide to do anything further with the global buffer queue.
307  __xray_log_remove_buffer_iterator();
308
309  // Once flushed, we should set the global status of the logging implementation
310  // to "uninitialized" to allow for FDR-logging multiple runs.
311  auto ResetToUnitialized = at_scope_exit([] {
312    atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
313                 memory_order_release);
314  });
315
316  auto CleanupBuffers = at_scope_exit([] {
317    auto &TLD = getThreadLocalData();
318    if (TLD.Controller != nullptr)
319      TLD.Controller->flush();
320  });
321
322  if (fdrFlags()->no_file_flush) {
323    if (Verbosity())
324      Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
325
326    atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
327                 memory_order_release);
328    return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
329  }
330
331  // We write out the file in the following format:
332  //
333  //   1) We write down the XRay file header with version 1, type FDR_LOG.
334  //   2) Then we use the 'apply' member of the BufferQueue that's live, to
335  //      ensure that at this point in time we write down the buffers that have
336  //      been released (and marked "used") -- we dump the full buffer for now
337  //      (fixed-sized) and let the tools reading the buffers deal with the data
338  //      afterwards.
339  //
340  LogWriter *LW = LogWriter::Open();
341  if (LW == nullptr) {
342    auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
343    atomic_store(&LogFlushStatus, Result, memory_order_release);
344    return Result;
345  }
346
347  XRayFileHeader Header = fdrCommonHeaderInfo();
348  Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
349  LW->WriteAll(reinterpret_cast<char *>(&Header),
350               reinterpret_cast<char *>(&Header) + sizeof(Header));
351
352  // Release the current thread's buffer before we attempt to write out all the
353  // buffers. This ensures that in case we had only a single thread going, that
354  // we are able to capture the data nonetheless.
355  auto &TLD = getThreadLocalData();
356  if (TLD.Controller != nullptr)
357    TLD.Controller->flush();
358
359  BQ->apply([&](const BufferQueue::Buffer &B) {
360    // Starting at version 2 of the FDR logging implementation, we only write
361    // the records identified by the extents of the buffer. We use the Extents
362    // from the Buffer and write that out as the first record in the buffer.  We
363    // still use a Metadata record, but fill in the extents instead for the
364    // data.
365    MetadataRecord ExtentsRecord;
366    auto BufferExtents = atomic_load(B.Extents, memory_order_acquire);
367    DCHECK(BufferExtents <= B.Size);
368    ExtentsRecord.Type = uint8_t(RecordType::Metadata);
369    ExtentsRecord.RecordKind =
370        uint8_t(MetadataRecord::RecordKinds::BufferExtents);
371    internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
372    if (BufferExtents > 0) {
373      LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
374                   reinterpret_cast<char *>(&ExtentsRecord) +
375                       sizeof(MetadataRecord));
376      LW->WriteAll(reinterpret_cast<char *>(B.Data),
377                   reinterpret_cast<char *>(B.Data) + BufferExtents);
378    }
379  });
380
381  atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
382               memory_order_release);
383  return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
384}
385
386XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
387  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
388  if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
389                                      XRayLogInitStatus::XRAY_LOG_FINALIZING,
390                                      memory_order_release)) {
391    if (Verbosity())
392      Report("Cannot finalize log, implementation not initialized.\n");
393    return static_cast<XRayLogInitStatus>(CurrentStatus);
394  }
395
396  // Do special things to make the log finalize itself, and not allow any more
397  // operations to be performed until re-initialized.
398  if (BQ == nullptr) {
399    if (Verbosity())
400      Report("Attempting to finalize an uninitialized global buffer!\n");
401  } else {
402    BQ->finalize();
403  }
404
405  atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
406               memory_order_release);
407  return XRayLogInitStatus::XRAY_LOG_FINALIZED;
408}
409
410struct TSCAndCPU {
411  uint64_t TSC = 0;
412  unsigned char CPU = 0;
413};
414
415static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
416  // We want to get the TSC as early as possible, so that we can check whether
417  // we've seen this CPU before. We also do it before we load anything else,
418  // to allow for forward progress with the scheduling.
419  TSCAndCPU Result;
420
421  // Test once for required CPU features
422  static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
423  static bool TSCSupported = true;
424  pthread_once(
425      &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
426
427  if (TSCSupported) {
428    Result.TSC = __xray::readTSC(Result.CPU);
429  } else {
430    // FIXME: This code needs refactoring as it appears in multiple locations
431    timespec TS;
432    int result = clock_gettime(CLOCK_REALTIME, &TS);
433    if (result != 0) {
434      Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
435      TS = {0, 0};
436    }
437    Result.CPU = 0;
438    Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
439  }
440  return Result;
441}
442
443thread_local atomic_uint8_t Running{0};
444
445static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
446  // Check if we're finalizing, before proceeding.
447  {
448    auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
449    if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
450        Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
451      if (TLD.Controller != nullptr) {
452        TLD.Controller->flush();
453        TLD.Controller = nullptr;
454      }
455      return false;
456    }
457  }
458
459  if (UNLIKELY(TLD.Controller == nullptr)) {
460    // Set up the TLD buffer queue.
461    if (UNLIKELY(BQ == nullptr))
462      return false;
463    TLD.BQ = BQ;
464
465    // Check that we have a valid buffer.
466    if (TLD.Buffer.Generation != BQ->generation() &&
467        TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
468      return false;
469
470    // Set up a buffer, before setting up the log writer. Bail out on failure.
471    if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
472      return false;
473
474    // Set up the Log Writer for this thread.
475    if (UNLIKELY(TLD.Writer == nullptr)) {
476      auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
477      new (LWStorage) FDRLogWriter(TLD.Buffer);
478      TLD.Writer = LWStorage;
479    } else {
480      TLD.Writer->resetRecord();
481    }
482
483    auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
484    new (CStorage)
485        FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
486                        atomic_load_relaxed(&ThresholdTicks));
487    TLD.Controller = CStorage;
488  }
489
490  DCHECK_NE(TLD.Controller, nullptr);
491  return true;
492}
493
494void fdrLoggingHandleArg0(int32_t FuncId,
495                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
496  auto TC = getTimestamp();
497  auto &TSC = TC.TSC;
498  auto &CPU = TC.CPU;
499  RecursionGuard Guard{Running};
500  if (!Guard)
501    return;
502
503  auto &TLD = getThreadLocalData();
504  if (!setupTLD(TLD))
505    return;
506
507  switch (Entry) {
508  case XRayEntryType::ENTRY:
509  case XRayEntryType::LOG_ARGS_ENTRY:
510    TLD.Controller->functionEnter(FuncId, TSC, CPU);
511    return;
512  case XRayEntryType::EXIT:
513    TLD.Controller->functionExit(FuncId, TSC, CPU);
514    return;
515  case XRayEntryType::TAIL:
516    TLD.Controller->functionTailExit(FuncId, TSC, CPU);
517    return;
518  case XRayEntryType::CUSTOM_EVENT:
519  case XRayEntryType::TYPED_EVENT:
520    break;
521  }
522}
523
524void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
525                          uint64_t Arg) XRAY_NEVER_INSTRUMENT {
526  auto TC = getTimestamp();
527  auto &TSC = TC.TSC;
528  auto &CPU = TC.CPU;
529  RecursionGuard Guard{Running};
530  if (!Guard)
531    return;
532
533  auto &TLD = getThreadLocalData();
534  if (!setupTLD(TLD))
535    return;
536
537  switch (Entry) {
538  case XRayEntryType::ENTRY:
539  case XRayEntryType::LOG_ARGS_ENTRY:
540    TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
541    return;
542  case XRayEntryType::EXIT:
543    TLD.Controller->functionExit(FuncId, TSC, CPU);
544    return;
545  case XRayEntryType::TAIL:
546    TLD.Controller->functionTailExit(FuncId, TSC, CPU);
547    return;
548  case XRayEntryType::CUSTOM_EVENT:
549  case XRayEntryType::TYPED_EVENT:
550    break;
551  }
552}
553
554void fdrLoggingHandleCustomEvent(void *Event,
555                                 std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
556  auto TC = getTimestamp();
557  auto &TSC = TC.TSC;
558  auto &CPU = TC.CPU;
559  RecursionGuard Guard{Running};
560  if (!Guard)
561    return;
562
563  // Complain when we ever get at least one custom event that's larger than what
564  // we can possibly support.
565  if (EventSize >
566      static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
567    static pthread_once_t Once = PTHREAD_ONCE_INIT;
568    pthread_once(
569        &Once, +[] {
570          Report("Custom event size too large; truncating to %d.\n",
571                 std::numeric_limits<int32_t>::max());
572        });
573  }
574
575  auto &TLD = getThreadLocalData();
576  if (!setupTLD(TLD))
577    return;
578
579  int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
580  TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize);
581}
582
583void fdrLoggingHandleTypedEvent(
584    uint16_t EventType, const void *Event,
585    std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT {
586  auto TC = getTimestamp();
587  auto &TSC = TC.TSC;
588  auto &CPU = TC.CPU;
589  RecursionGuard Guard{Running};
590  if (!Guard)
591    return;
592
593  // Complain when we ever get at least one typed event that's larger than what
594  // we can possibly support.
595  if (EventSize >
596      static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
597    static pthread_once_t Once = PTHREAD_ONCE_INIT;
598    pthread_once(
599        &Once, +[] {
600          Report("Typed event size too large; truncating to %d.\n",
601                 std::numeric_limits<int32_t>::max());
602        });
603  }
604
605  auto &TLD = getThreadLocalData();
606  if (!setupTLD(TLD))
607    return;
608
609  int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
610  TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize);
611}
612
613XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
614                                 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
615  if (Options == nullptr)
616    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
617
618  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
619  if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
620                                      XRayLogInitStatus::XRAY_LOG_INITIALIZING,
621                                      memory_order_release)) {
622    if (Verbosity())
623      Report("Cannot initialize already initialized implementation.\n");
624    return static_cast<XRayLogInitStatus>(CurrentStatus);
625  }
626
627  if (Verbosity())
628    Report("Initializing FDR mode with options: %s\n",
629           static_cast<const char *>(Options));
630
631  // TODO: Factor out the flags specific to the FDR mode implementation. For
632  // now, use the global/single definition of the flags, since the FDR mode
633  // flags are already defined there.
634  FlagParser FDRParser;
635  FDRFlags FDRFlags;
636  registerXRayFDRFlags(&FDRParser, &FDRFlags);
637  FDRFlags.setDefaults();
638
639  // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
640  // options until we migrate everyone to use the XRAY_FDR_OPTIONS
641  // compiler-provided options.
642  FDRParser.ParseString(useCompilerDefinedFlags());
643  FDRParser.ParseString(useCompilerDefinedFDRFlags());
644  auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
645  if (EnvOpts == nullptr)
646    EnvOpts = "";
647  FDRParser.ParseString(EnvOpts);
648
649  // FIXME: Remove this when we fully remove the deprecated flags.
650  if (internal_strlen(EnvOpts) == 0) {
651    FDRFlags.func_duration_threshold_us =
652        flags()->xray_fdr_log_func_duration_threshold_us;
653    FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
654  }
655
656  // The provided options should always override the compiler-provided and
657  // environment-variable defined options.
658  FDRParser.ParseString(static_cast<const char *>(Options));
659  *fdrFlags() = FDRFlags;
660  auto BufferSize = FDRFlags.buffer_size;
661  auto BufferMax = FDRFlags.buffer_max;
662
663  if (BQ == nullptr) {
664    bool Success = false;
665    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
666    new (BQ) BufferQueue(BufferSize, BufferMax, Success);
667    if (!Success) {
668      Report("BufferQueue init failed.\n");
669      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
670    }
671  } else {
672    if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) {
673      if (Verbosity())
674        Report("Failed to re-initialize global buffer queue. Init failed.\n");
675      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
676    }
677  }
678
679  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
680  pthread_once(
681      &OnceInit, +[] {
682        atomic_store(&TicksPerSec,
683                     probeRequiredCPUFeatures() ? getTSCFrequency()
684                                                : __xray::NanosecondsPerSecond,
685                     memory_order_release);
686        pthread_key_create(
687            &Key, +[](void *TLDPtr) {
688              if (TLDPtr == nullptr)
689                return;
690              auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
691              if (TLD.BQ == nullptr)
692                return;
693              if (TLD.Buffer.Data == nullptr)
694                return;
695              auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
696              if (EC != BufferQueue::ErrorCode::Ok)
697                Report("At thread exit, failed to release buffer at %p; "
698                       "error=%s\n",
699                       TLD.Buffer.Data, BufferQueue::getErrorString(EC));
700            });
701      });
702
703  atomic_store(&ThresholdTicks,
704               atomic_load_relaxed(&TicksPerSec) *
705                   fdrFlags()->func_duration_threshold_us / 1000000,
706               memory_order_release);
707  // Arg1 handler should go in first to avoid concurrent code accidentally
708  // falling back to arg0 when it should have ran arg1.
709  __xray_set_handler_arg1(fdrLoggingHandleArg1);
710  // Install the actual handleArg0 handler after initialising the buffers.
711  __xray_set_handler(fdrLoggingHandleArg0);
712  __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
713  __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
714
715  // Install the buffer iterator implementation.
716  __xray_log_set_buffer_iterator(fdrIterator);
717
718  atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
719               memory_order_release);
720
721  if (Verbosity())
722    Report("XRay FDR init successful.\n");
723  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
724}
725
726bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
727  XRayLogImpl Impl{
728      fdrLoggingInit,
729      fdrLoggingFinalize,
730      fdrLoggingHandleArg0,
731      fdrLoggingFlush,
732  };
733  auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
734  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
735      Verbosity()) {
736    Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
737           RegistrationResult);
738    return false;
739  }
740
741  if (flags()->xray_fdr_log ||
742      !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
743    auto SelectResult = __xray_log_select_mode("xray-fdr");
744    if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
745        Verbosity()) {
746      Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
747             SelectResult);
748      return false;
749    }
750  }
751  return true;
752}
753
754} // namespace __xray
755
756static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
757