1//===-- sanitizer_procmaps_mac.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Information about the process mappings (Mac-specific parts).
10//===----------------------------------------------------------------------===//
11
12#include "sanitizer_platform.h"
13#if SANITIZER_MAC
14#include "sanitizer_common.h"
15#include "sanitizer_placement_new.h"
16#include "sanitizer_procmaps.h"
17
18#include <mach-o/dyld.h>
19#include <mach-o/loader.h>
20#include <mach/mach.h>
21
22// These are not available in older macOS SDKs.
23#ifndef CPU_SUBTYPE_X86_64_H
24#define CPU_SUBTYPE_X86_64_H  ((cpu_subtype_t)8)   /* Haswell */
25#endif
26#ifndef CPU_SUBTYPE_ARM_V7S
27#define CPU_SUBTYPE_ARM_V7S   ((cpu_subtype_t)11)  /* Swift */
28#endif
29#ifndef CPU_SUBTYPE_ARM_V7K
30#define CPU_SUBTYPE_ARM_V7K   ((cpu_subtype_t)12)
31#endif
32#ifndef CPU_TYPE_ARM64
33#define CPU_TYPE_ARM64        (CPU_TYPE_ARM | CPU_ARCH_ABI64)
34#endif
35
36namespace __sanitizer {
37
38// Contains information used to iterate through sections.
39struct MemoryMappedSegmentData {
40  char name[kMaxSegName];
41  uptr nsects;
42  const char *current_load_cmd_addr;
43  u32 lc_type;
44  uptr base_virt_addr;
45  uptr addr_mask;
46};
47
48template <typename Section>
49static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
50                            bool isWritable) {
51  const Section *sc = (const Section *)data->current_load_cmd_addr;
52  data->current_load_cmd_addr += sizeof(Section);
53
54  uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
55  uptr sec_end = sec_start + sc->size;
56  module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
57                          sc->sectname);
58}
59
60void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
61  // Don't iterate over sections when the caller hasn't set up the
62  // data pointer, when there are no sections, or when the segment
63  // is executable. Avoid iterating over executable sections because
64  // it will confuse libignore, and because the extra granularity
65  // of information is not needed by any sanitizers.
66  if (!data_ || !data_->nsects || IsExecutable()) {
67    module->addAddressRange(start, end, IsExecutable(), IsWritable(),
68                            data_ ? data_->name : nullptr);
69    return;
70  }
71
72  do {
73    if (data_->lc_type == LC_SEGMENT) {
74      NextSectionLoad<struct section>(module, data_, IsWritable());
75#ifdef MH_MAGIC_64
76    } else if (data_->lc_type == LC_SEGMENT_64) {
77      NextSectionLoad<struct section_64>(module, data_, IsWritable());
78#endif
79    }
80  } while (--data_->nsects);
81}
82
83MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
84  Reset();
85}
86
87MemoryMappingLayout::~MemoryMappingLayout() {
88}
89
90bool MemoryMappingLayout::Error() const {
91  return false;
92}
93
94// More information about Mach-O headers can be found in mach-o/loader.h
95// Each Mach-O image has a header (mach_header or mach_header_64) starting with
96// a magic number, and a list of linker load commands directly following the
97// header.
98// A load command is at least two 32-bit words: the command type and the
99// command size in bytes. We're interested only in segment load commands
100// (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
101// into the task's address space.
102// The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
103// segment_command_64 correspond to the memory address, memory size and the
104// file offset of the current memory segment.
105// Because these fields are taken from the images as is, one needs to add
106// _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
107
108void MemoryMappingLayout::Reset() {
109  // Count down from the top.
110  // TODO(glider): as per man 3 dyld, iterating over the headers with
111  // _dyld_image_count is thread-unsafe. We need to register callbacks for
112  // adding and removing images which will invalidate the MemoryMappingLayout
113  // state.
114  data_.current_image = _dyld_image_count();
115  data_.current_load_cmd_count = -1;
116  data_.current_load_cmd_addr = 0;
117  data_.current_magic = 0;
118  data_.current_filetype = 0;
119  data_.current_arch = kModuleArchUnknown;
120  internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
121}
122
123// The dyld load address should be unchanged throughout process execution,
124// and it is expensive to compute once many libraries have been loaded,
125// so cache it here and do not reset.
126static mach_header *dyld_hdr = 0;
127static const char kDyldPath[] = "/usr/lib/dyld";
128static const int kDyldImageIdx = -1;
129
130// static
131void MemoryMappingLayout::CacheMemoryMappings() {
132  // No-op on Mac for now.
133}
134
135void MemoryMappingLayout::LoadFromCache() {
136  // No-op on Mac for now.
137}
138
139// _dyld_get_image_header() and related APIs don't report dyld itself.
140// We work around this by manually recursing through the memory map
141// until we hit a Mach header matching dyld instead. These recurse
142// calls are expensive, but the first memory map generation occurs
143// early in the process, when dyld is one of the only images loaded,
144// so it will be hit after only a few iterations.
145static mach_header *get_dyld_image_header() {
146  unsigned depth = 1;
147  vm_size_t size = 0;
148  vm_address_t address = 0;
149  kern_return_t err = KERN_SUCCESS;
150  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
151
152  while (true) {
153    struct vm_region_submap_info_64 info;
154    err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
155                               (vm_region_info_t)&info, &count);
156    if (err != KERN_SUCCESS) return nullptr;
157
158    if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
159      mach_header *hdr = (mach_header *)address;
160      if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
161          hdr->filetype == MH_DYLINKER) {
162        return hdr;
163      }
164    }
165    address += size;
166  }
167}
168
169const mach_header *get_dyld_hdr() {
170  if (!dyld_hdr) dyld_hdr = get_dyld_image_header();
171
172  return dyld_hdr;
173}
174
175// Next and NextSegmentLoad were inspired by base/sysinfo.cc in
176// Google Perftools, https://github.com/gperftools/gperftools.
177
178// NextSegmentLoad scans the current image for the next segment load command
179// and returns the start and end addresses and file offset of the corresponding
180// segment.
181// Note that the segment addresses are not necessarily sorted.
182template <u32 kLCSegment, typename SegmentCommand>
183static bool NextSegmentLoad(MemoryMappedSegment *segment,
184                            MemoryMappedSegmentData *seg_data,
185                            MemoryMappingLayoutData *layout_data) {
186  const char *lc = layout_data->current_load_cmd_addr;
187  layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
188  if (((const load_command *)lc)->cmd == kLCSegment) {
189    const SegmentCommand* sc = (const SegmentCommand *)lc;
190    uptr base_virt_addr, addr_mask;
191    if (layout_data->current_image == kDyldImageIdx) {
192      base_virt_addr = (uptr)get_dyld_hdr();
193      // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
194      // it contains an absolute address rather than an offset for dyld.
195      // To make matters even more complicated, this absolute address
196      // isn't actually the absolute segment address, but the offset portion
197      // of the address is accurate when combined with the dyld base address,
198      // and the mask will give just this offset.
199      addr_mask = 0xfffff;
200    } else {
201      base_virt_addr =
202          (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
203      addr_mask = ~0;
204    }
205
206    segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
207    segment->end = segment->start + sc->vmsize;
208    // Most callers don't need section information, so only fill this struct
209    // when required.
210    if (seg_data) {
211      seg_data->nsects = sc->nsects;
212      seg_data->current_load_cmd_addr =
213          (const char *)lc + sizeof(SegmentCommand);
214      seg_data->lc_type = kLCSegment;
215      seg_data->base_virt_addr = base_virt_addr;
216      seg_data->addr_mask = addr_mask;
217      internal_strncpy(seg_data->name, sc->segname,
218                       ARRAY_SIZE(seg_data->name));
219    }
220
221    // Return the initial protection.
222    segment->protection = sc->initprot;
223    segment->offset = (layout_data->current_filetype ==
224                       /*MH_EXECUTE*/ 0x2)
225                          ? sc->vmaddr
226                          : sc->fileoff;
227    if (segment->filename) {
228      const char *src = (layout_data->current_image == kDyldImageIdx)
229                            ? kDyldPath
230                            : _dyld_get_image_name(layout_data->current_image);
231      internal_strncpy(segment->filename, src, segment->filename_size);
232    }
233    segment->arch = layout_data->current_arch;
234    internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize);
235    return true;
236  }
237  return false;
238}
239
240ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
241  cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
242  switch (cputype) {
243    case CPU_TYPE_I386:
244      return kModuleArchI386;
245    case CPU_TYPE_X86_64:
246      if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
247      if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
248      CHECK(0 && "Invalid subtype of x86_64");
249      return kModuleArchUnknown;
250    case CPU_TYPE_ARM:
251      if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
252      if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
253      if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
254      if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
255      CHECK(0 && "Invalid subtype of ARM");
256      return kModuleArchUnknown;
257    case CPU_TYPE_ARM64:
258      return kModuleArchARM64;
259    default:
260      CHECK(0 && "Invalid CPU type");
261      return kModuleArchUnknown;
262  }
263}
264
265static const load_command *NextCommand(const load_command *lc) {
266  return (const load_command *)((const char *)lc + lc->cmdsize);
267}
268
269static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
270  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
271    if (lc->cmd != LC_UUID) continue;
272
273    const uuid_command *uuid_lc = (const uuid_command *)lc;
274    const uint8_t *uuid = &uuid_lc->uuid[0];
275    internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
276    return;
277  }
278}
279
280static bool IsModuleInstrumented(const load_command *first_lc) {
281  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
282    if (lc->cmd != LC_LOAD_DYLIB) continue;
283
284    const dylib_command *dylib_lc = (const dylib_command *)lc;
285    uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
286    const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
287    dylib_name = StripModuleName(dylib_name);
288    if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
289      return true;
290    }
291  }
292  return false;
293}
294
295bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
296  for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
297    const mach_header *hdr = (data_.current_image == kDyldImageIdx)
298                                 ? get_dyld_hdr()
299                                 : _dyld_get_image_header(data_.current_image);
300    if (!hdr) continue;
301    if (data_.current_load_cmd_count < 0) {
302      // Set up for this image;
303      data_.current_load_cmd_count = hdr->ncmds;
304      data_.current_magic = hdr->magic;
305      data_.current_filetype = hdr->filetype;
306      data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
307      switch (data_.current_magic) {
308#ifdef MH_MAGIC_64
309        case MH_MAGIC_64: {
310          data_.current_load_cmd_addr =
311              (const char *)hdr + sizeof(mach_header_64);
312          break;
313        }
314#endif
315        case MH_MAGIC: {
316          data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header);
317          break;
318        }
319        default: {
320          continue;
321        }
322      }
323      FindUUID((const load_command *)data_.current_load_cmd_addr,
324               data_.current_uuid);
325      data_.current_instrumented = IsModuleInstrumented(
326          (const load_command *)data_.current_load_cmd_addr);
327    }
328
329    for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) {
330      switch (data_.current_magic) {
331        // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
332#ifdef MH_MAGIC_64
333        case MH_MAGIC_64: {
334          if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
335                  segment, segment->data_, &data_))
336            return true;
337          break;
338        }
339#endif
340        case MH_MAGIC: {
341          if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
342                  segment, segment->data_, &data_))
343            return true;
344          break;
345        }
346      }
347    }
348    // If we get here, no more load_cmd's in this image talk about
349    // segments.  Go on to the next image.
350  }
351  return false;
352}
353
354void MemoryMappingLayout::DumpListOfModules(
355    InternalMmapVectorNoCtor<LoadedModule> *modules) {
356  Reset();
357  InternalScopedString module_name(kMaxPathLength);
358  MemoryMappedSegment segment(module_name.data(), kMaxPathLength);
359  MemoryMappedSegmentData data;
360  segment.data_ = &data;
361  while (Next(&segment)) {
362    if (segment.filename[0] == '\0') continue;
363    LoadedModule *cur_module = nullptr;
364    if (!modules->empty() &&
365        0 == internal_strcmp(segment.filename, modules->back().full_name())) {
366      cur_module = &modules->back();
367    } else {
368      modules->push_back(LoadedModule());
369      cur_module = &modules->back();
370      cur_module->set(segment.filename, segment.start, segment.arch,
371                      segment.uuid, data_.current_instrumented);
372    }
373    segment.AddAddressRanges(cur_module);
374  }
375}
376
377}  // namespace __sanitizer
378
379#endif  // SANITIZER_MAC
380