1#include "cpuid.h"
2#include "sanitizer_common/sanitizer_common.h"
3#if !SANITIZER_FUCHSIA
4#include "sanitizer_common/sanitizer_posix.h"
5#endif
6#include "xray_defs.h"
7#include "xray_interface_internal.h"
8
9#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
10#include <sys/types.h>
11#if SANITIZER_OPENBSD
12#include <sys/time.h>
13#include <machine/cpu.h>
14#endif
15#include <sys/sysctl.h>
16#elif SANITIZER_FUCHSIA
17#include <zircon/syscalls.h>
18#endif
19
20#include <atomic>
21#include <cstdint>
22#include <errno.h>
23#include <fcntl.h>
24#include <iterator>
25#include <limits>
26#include <tuple>
27#include <unistd.h>
28
29namespace __xray {
30
31#if SANITIZER_LINUX
32static std::pair<ssize_t, bool>
33retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
34  auto BytesToRead = std::distance(Begin, End);
35  ssize_t BytesRead;
36  ssize_t TotalBytesRead = 0;
37  while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
38    if (BytesRead == -1) {
39      if (errno == EINTR)
40        continue;
41      Report("Read error; errno = %d\n", errno);
42      return std::make_pair(TotalBytesRead, false);
43    }
44
45    TotalBytesRead += BytesRead;
46    BytesToRead -= BytesRead;
47    Begin += BytesRead;
48  }
49  return std::make_pair(TotalBytesRead, true);
50}
51
52static bool readValueFromFile(const char *Filename,
53                              long long *Value) XRAY_NEVER_INSTRUMENT {
54  int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
55  if (Fd == -1)
56    return false;
57  static constexpr size_t BufSize = 256;
58  char Line[BufSize] = {};
59  ssize_t BytesRead;
60  bool Success;
61  std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
62  close(Fd);
63  if (!Success)
64    return false;
65  const char *End = nullptr;
66  long long Tmp = internal_simple_strtoll(Line, &End, 10);
67  bool Result = false;
68  if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
69    *Value = Tmp;
70    Result = true;
71  }
72  return Result;
73}
74
75uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
76  long long TSCFrequency = -1;
77  if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
78                        &TSCFrequency)) {
79    TSCFrequency *= 1000;
80  } else if (readValueFromFile(
81                 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
82                 &TSCFrequency)) {
83    TSCFrequency *= 1000;
84  } else {
85    Report("Unable to determine CPU frequency for TSC accounting.\n");
86  }
87  return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
88}
89#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
90uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
91    long long TSCFrequency = -1;
92    size_t tscfreqsz = sizeof(TSCFrequency);
93#if SANITIZER_OPENBSD
94    int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
95    if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
96#elif SANITIZER_MAC
97    if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
98                              &tscfreqsz, NULL, 0) != -1) {
99
100#else
101    if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
102                              NULL, 0) != -1) {
103#endif
104        return static_cast<uint64_t>(TSCFrequency);
105    } else {
106      Report("Unable to determine CPU frequency for TSC accounting.\n");
107    }
108
109    return 0;
110}
111#elif !SANITIZER_FUCHSIA
112uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
113    /* Not supported */
114    return 0;
115}
116#endif
117
118static constexpr uint8_t CallOpCode = 0xe8;
119static constexpr uint16_t MovR10Seq = 0xba41;
120static constexpr uint16_t Jmp9Seq = 0x09eb;
121static constexpr uint16_t Jmp20Seq = 0x14eb;
122static constexpr uint16_t Jmp15Seq = 0x0feb;
123static constexpr uint8_t JmpOpCode = 0xe9;
124static constexpr uint8_t RetOpCode = 0xc3;
125static constexpr uint16_t NopwSeq = 0x9066;
126
127static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
128static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
129
130bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
131                        const XRaySledEntry &Sled,
132                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
133  // Here we do the dance of replacing the following sled:
134  //
135  // xray_sled_n:
136  //   jmp +9
137  //   <9 byte nop>
138  //
139  // With the following:
140  //
141  //   mov r10d, <function id>
142  //   call <relative 32bit offset to entry trampoline>
143  //
144  // We need to do this in the following order:
145  //
146  // 1. Put the function id first, 2 bytes from the start of the sled (just
147  // after the 2-byte jmp instruction).
148  // 2. Put the call opcode 6 bytes from the start of the sled.
149  // 3. Put the relative offset 7 bytes from the start of the sled.
150  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
151  // opcode and first operand.
152  //
153  // Prerequisite is to compute the relative offset to the trampoline's address.
154  const uint64_t Address = Sled.address();
155  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
156                             (static_cast<int64_t>(Address) + 11);
157  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
158    Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline,
159           reinterpret_cast<void *>(Address));
160    return false;
161  }
162  if (Enable) {
163    *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
164    *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
165    *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
166    std::atomic_store_explicit(
167        reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
168        std::memory_order_release);
169  } else {
170    std::atomic_store_explicit(
171        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
172        std::memory_order_release);
173    // FIXME: Write out the nops still?
174  }
175  return true;
176}
177
178bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
179                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
180  // Here we do the dance of replacing the following sled:
181  //
182  // xray_sled_n:
183  //   ret
184  //   <10 byte nop>
185  //
186  // With the following:
187  //
188  //   mov r10d, <function id>
189  //   jmp <relative 32bit offset to exit trampoline>
190  //
191  // 1. Put the function id first, 2 bytes from the start of the sled (just
192  // after the 1-byte ret instruction).
193  // 2. Put the jmp opcode 6 bytes from the start of the sled.
194  // 3. Put the relative offset 7 bytes from the start of the sled.
195  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
196  // opcode and first operand.
197  //
198  // Prerequisite is to compute the relative offset fo the
199  // __xray_FunctionExit function's address.
200  const uint64_t Address = Sled.address();
201  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
202                             (static_cast<int64_t>(Address) + 11);
203  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
204    Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
205           __xray_FunctionExit, reinterpret_cast<void *>(Address));
206    return false;
207  }
208  if (Enable) {
209    *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
210    *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
211    *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
212    std::atomic_store_explicit(
213        reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
214        std::memory_order_release);
215  } else {
216    std::atomic_store_explicit(
217        reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
218        std::memory_order_release);
219    // FIXME: Write out the nops still?
220  }
221  return true;
222}
223
224bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
225                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
226  // Here we do the dance of replacing the tail call sled with a similar
227  // sequence as the entry sled, but calls the tail exit sled instead.
228  const uint64_t Address = Sled.address();
229  int64_t TrampolineOffset =
230      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
231      (static_cast<int64_t>(Address) + 11);
232  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
233    Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
234           __xray_FunctionTailExit, reinterpret_cast<void *>(Address));
235    return false;
236  }
237  if (Enable) {
238    *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
239    *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
240    *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
241    std::atomic_store_explicit(
242        reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
243        std::memory_order_release);
244  } else {
245    std::atomic_store_explicit(
246        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
247        std::memory_order_release);
248    // FIXME: Write out the nops still?
249  }
250  return true;
251}
252
253bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
254                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
255  // Here we do the dance of replacing the following sled:
256  //
257  // In Version 0:
258  //
259  // xray_sled_n:
260  //   jmp +20          // 2 bytes
261  //   ...
262  //
263  // With the following:
264  //
265  //   nopw             // 2 bytes*
266  //   ...
267  //
268  //
269  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
270  //
271  // ---
272  //
273  // In Version 1 or 2:
274  //
275  //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
276  //   to a jmp, use 15 bytes instead.
277  //
278  const uint64_t Address = Sled.address();
279  if (Enable) {
280    std::atomic_store_explicit(
281        reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
282        std::memory_order_release);
283  } else {
284    switch (Sled.Version) {
285    case 1:
286    case 2:
287      std::atomic_store_explicit(
288          reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
289          std::memory_order_release);
290      break;
291    case 0:
292    default:
293      std::atomic_store_explicit(
294          reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
295          std::memory_order_release);
296      break;
297    }
298    }
299  return false;
300}
301
302bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
303                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
304  // Here we do the dance of replacing the following sled:
305  //
306  // xray_sled_n:
307  //   jmp +20          // 2 byte instruction
308  //   ...
309  //
310  // With the following:
311  //
312  //   nopw             // 2 bytes
313  //   ...
314  //
315  //
316  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
317  // The 20 byte sled stashes three argument registers, calls the trampoline,
318  // unstashes the registers and returns. If the arguments are already in
319  // the correct registers, the stashing and unstashing become equivalently
320  // sized nops.
321  const uint64_t Address = Sled.address();
322  if (Enable) {
323    std::atomic_store_explicit(
324        reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
325        std::memory_order_release);
326  } else {
327    std::atomic_store_explicit(
328        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
329        std::memory_order_release);
330  }
331  return false;
332}
333
334#if !SANITIZER_FUCHSIA
335// We determine whether the CPU we're running on has the correct features we
336// need. In x86_64 this will be rdtscp support.
337bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
338  unsigned int EAX, EBX, ECX, EDX;
339
340  // We check whether rdtscp support is enabled. According to the x86_64 manual,
341  // level should be set at 0x80000001, and we should have a look at bit 27 in
342  // EDX. That's 0x8000000 (or 1u << 27).
343  __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
344    : "0"(0x80000001));
345  if (!(EDX & (1u << 27))) {
346    Report("Missing rdtscp support.\n");
347    return false;
348  }
349  // Also check whether we can determine the CPU frequency, since if we cannot,
350  // we should use the emulated TSC instead.
351  if (!getTSCFrequency()) {
352    Report("Unable to determine CPU frequency.\n");
353    return false;
354  }
355  return true;
356}
357#endif
358
359} // namespace __xray
360