compact-unwind-dumper.c revision 360784
1#include <fcntl.h>
2#include <inttypes.h>
3#include <mach-o/compact_unwind_encoding.h>
4#include <mach-o/loader.h>
5#include <mach-o/nlist.h>
6#include <mach/machine.h>
7#include <stdbool.h>
8#include <stdint.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/errno.h>
13#include <sys/mman.h>
14#include <sys/stat.h>
15#include <sys/types.h>
16
17enum {
18  UNWIND_ARM64_MODE_MASK = 0x0F000000,
19  UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
20  UNWIND_ARM64_MODE_DWARF = 0x03000000,
21  UNWIND_ARM64_MODE_FRAME = 0x04000000,
22
23  UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
24  UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
25  UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
26  UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
27  UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
28  UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
29  UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
30  UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
31  UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800,
32
33  UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000,
34  UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF,
35};
36
37enum {
38  UNWIND_ARM_MODE_MASK = 0x0F000000,
39  UNWIND_ARM_MODE_FRAME = 0x01000000,
40  UNWIND_ARM_MODE_FRAME_D = 0x02000000,
41  UNWIND_ARM_MODE_DWARF = 0x04000000,
42
43  UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
44
45  UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
46  UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
47  UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
48
49  UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
50  UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
51  UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
52  UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
53  UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
54
55  UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000700,
56
57  UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF,
58};
59
60#define EXTRACT_BITS(value, mask)                                              \
61  ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1))
62
63// A quick sketch of a program which can parse the compact unwind info
64// used on Darwin systems for exception handling.  The output of
65// unwinddump will be more authoritative/reliable but this program
66// can dump at least the UNWIND_X86_64_MODE_RBP_FRAME format entries
67// correctly.
68
69struct symbol {
70  uint64_t file_address;
71  const char *name;
72};
73
74int symbol_compare(const void *a, const void *b) {
75  return (int)((struct symbol *)a)->file_address -
76         ((struct symbol *)b)->file_address;
77}
78
79struct baton {
80  cpu_type_t cputype;
81
82  uint8_t *mach_header_start;    // pointer into this program's address space
83  uint8_t *compact_unwind_start; // pointer into this program's address space
84
85  int addr_size; // 4 or 8 bytes, the size of addresses in this file
86
87  uint64_t text_segment_vmaddr; // __TEXT segment vmaddr
88  uint64_t text_segment_file_offset;
89
90  uint64_t text_section_vmaddr; // __TEXT,__text section vmaddr
91  uint64_t text_section_file_offset;
92
93  uint64_t eh_section_file_address; // the file address of the __TEXT,__eh_frame
94                                    // section
95
96  uint8_t
97      *lsda_array_start; // for the currently-being-processed first-level index
98  uint8_t
99      *lsda_array_end; // the lsda_array_start for the NEXT first-level index
100
101  struct symbol *symbols;
102  int symbols_count;
103
104  uint64_t *function_start_addresses;
105  int function_start_addresses_count;
106
107  int current_index_table_number;
108
109  struct unwind_info_section_header unwind_header;
110  struct unwind_info_section_header_index_entry first_level_index_entry;
111  struct unwind_info_compressed_second_level_page_header
112      compressed_second_level_page_header;
113  struct unwind_info_regular_second_level_page_header
114      regular_second_level_page_header;
115};
116
117uint64_t read_leb128(uint8_t **offset) {
118  uint64_t result = 0;
119  int shift = 0;
120  while (1) {
121    uint8_t byte = **offset;
122    *offset = *offset + 1;
123    result |= (byte & 0x7f) << shift;
124    if ((byte & 0x80) == 0)
125      break;
126    shift += 7;
127  }
128
129  return result;
130}
131
132// step through the load commands in a thin mach-o binary,
133// find the cputype and the start of the __TEXT,__unwind_info
134// section, return a pointer to that section or NULL if not found.
135
136static void scan_macho_load_commands(struct baton *baton) {
137  struct symtab_command symtab_cmd;
138  uint64_t linkedit_segment_vmaddr;
139  uint64_t linkedit_segment_file_offset;
140
141  baton->compact_unwind_start = 0;
142
143  uint32_t *magic = (uint32_t *)baton->mach_header_start;
144
145  if (*magic != MH_MAGIC && *magic != MH_MAGIC_64) {
146    printf("Unexpected magic number 0x%x in header, exiting.", *magic);
147    exit(1);
148  }
149
150  bool is_64bit = false;
151  if (*magic == MH_MAGIC_64)
152    is_64bit = true;
153
154  uint8_t *offset = baton->mach_header_start;
155
156  struct mach_header mh;
157  memcpy(&mh, offset, sizeof(struct mach_header));
158  if (is_64bit)
159    offset += sizeof(struct mach_header_64);
160  else
161    offset += sizeof(struct mach_header);
162
163  if (is_64bit)
164    baton->addr_size = 8;
165  else
166    baton->addr_size = 4;
167
168  baton->cputype = mh.cputype;
169
170  uint8_t *start_of_load_commands = offset;
171
172  uint32_t cur_cmd = 0;
173  while (cur_cmd < mh.ncmds &&
174         (offset - start_of_load_commands) < mh.sizeofcmds) {
175    struct load_command lc;
176    uint32_t *lc_cmd = (uint32_t *)offset;
177    uint32_t *lc_cmdsize = (uint32_t *)offset + 1;
178    uint8_t *start_of_this_load_cmd = offset;
179
180    if (*lc_cmd == LC_SEGMENT || *lc_cmd == LC_SEGMENT_64) {
181      char segment_name[17];
182      segment_name[0] = '\0';
183      uint32_t nsects = 0;
184      uint64_t segment_offset = 0;
185      uint64_t segment_vmaddr = 0;
186
187      if (*lc_cmd == LC_SEGMENT_64) {
188        struct segment_command_64 seg;
189        memcpy(&seg, offset, sizeof(struct segment_command_64));
190        memcpy(&segment_name, &seg.segname, 16);
191        segment_name[16] = '\0';
192        nsects = seg.nsects;
193        segment_offset = seg.fileoff;
194        segment_vmaddr = seg.vmaddr;
195        offset += sizeof(struct segment_command_64);
196        if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) {
197          printf("Segment '%s' is encrypted.\n", segment_name);
198        }
199      }
200
201      if (*lc_cmd == LC_SEGMENT) {
202        struct segment_command seg;
203        memcpy(&seg, offset, sizeof(struct segment_command));
204        memcpy(&segment_name, &seg.segname, 16);
205        segment_name[16] = '\0';
206        nsects = seg.nsects;
207        segment_offset = seg.fileoff;
208        segment_vmaddr = seg.vmaddr;
209        offset += sizeof(struct segment_command);
210        if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) {
211          printf("Segment '%s' is encrypted.\n", segment_name);
212        }
213      }
214
215      if (nsects != 0 && strcmp(segment_name, "__TEXT") == 0) {
216        baton->text_segment_vmaddr = segment_vmaddr;
217        baton->text_segment_file_offset = segment_offset;
218
219        uint32_t current_sect = 0;
220        while (current_sect < nsects &&
221               (offset - start_of_this_load_cmd) < *lc_cmdsize) {
222          char sect_name[17];
223          memcpy(&sect_name, offset, 16);
224          sect_name[16] = '\0';
225          if (strcmp(sect_name, "__unwind_info") == 0) {
226            if (is_64bit) {
227              struct section_64 sect;
228              memset(&sect, 0, sizeof(struct section_64));
229              memcpy(&sect, offset, sizeof(struct section_64));
230              baton->compact_unwind_start =
231                  baton->mach_header_start + sect.offset;
232            } else {
233              struct section sect;
234              memset(&sect, 0, sizeof(struct section));
235              memcpy(&sect, offset, sizeof(struct section));
236              baton->compact_unwind_start =
237                  baton->mach_header_start + sect.offset;
238            }
239          }
240          if (strcmp(sect_name, "__eh_frame") == 0) {
241            if (is_64bit) {
242              struct section_64 sect;
243              memset(&sect, 0, sizeof(struct section_64));
244              memcpy(&sect, offset, sizeof(struct section_64));
245              baton->eh_section_file_address = sect.addr;
246            } else {
247              struct section sect;
248              memset(&sect, 0, sizeof(struct section));
249              memcpy(&sect, offset, sizeof(struct section));
250              baton->eh_section_file_address = sect.addr;
251            }
252          }
253          if (strcmp(sect_name, "__text") == 0) {
254            if (is_64bit) {
255              struct section_64 sect;
256              memset(&sect, 0, sizeof(struct section_64));
257              memcpy(&sect, offset, sizeof(struct section_64));
258              baton->text_section_vmaddr = sect.addr;
259              baton->text_section_file_offset = sect.offset;
260            } else {
261              struct section sect;
262              memset(&sect, 0, sizeof(struct section));
263              memcpy(&sect, offset, sizeof(struct section));
264              baton->text_section_vmaddr = sect.addr;
265            }
266          }
267          if (is_64bit) {
268            offset += sizeof(struct section_64);
269          } else {
270            offset += sizeof(struct section);
271          }
272        }
273      }
274
275      if (strcmp(segment_name, "__LINKEDIT") == 0) {
276        linkedit_segment_vmaddr = segment_vmaddr;
277        linkedit_segment_file_offset = segment_offset;
278      }
279    }
280
281    if (*lc_cmd == LC_SYMTAB) {
282      memcpy(&symtab_cmd, offset, sizeof(struct symtab_command));
283    }
284
285    if (*lc_cmd == LC_DYSYMTAB) {
286      struct dysymtab_command dysymtab_cmd;
287      memcpy(&dysymtab_cmd, offset, sizeof(struct dysymtab_command));
288
289      int nlist_size = 12;
290      if (is_64bit)
291        nlist_size = 16;
292
293      char *string_table =
294          (char *)(baton->mach_header_start + symtab_cmd.stroff);
295      uint8_t *local_syms = baton->mach_header_start + symtab_cmd.symoff +
296                            (dysymtab_cmd.ilocalsym * nlist_size);
297      int local_syms_count = dysymtab_cmd.nlocalsym;
298      uint8_t *exported_syms = baton->mach_header_start + symtab_cmd.symoff +
299                               (dysymtab_cmd.iextdefsym * nlist_size);
300      int exported_syms_count = dysymtab_cmd.nextdefsym;
301
302      // We're only going to create records for a small number of these symbols
303      // but to
304      // simplify the memory management I'll allocate enough space to store all
305      // of them.
306      baton->symbols = (struct symbol *)malloc(
307          sizeof(struct symbol) * (local_syms_count + exported_syms_count));
308      baton->symbols_count = 0;
309
310      for (int i = 0; i < local_syms_count; i++) {
311        struct nlist_64 nlist;
312        memset(&nlist, 0, sizeof(struct nlist_64));
313        if (is_64bit) {
314          memcpy(&nlist, local_syms + (i * nlist_size),
315                 sizeof(struct nlist_64));
316        } else {
317          struct nlist nlist_32;
318          memset(&nlist_32, 0, sizeof(struct nlist));
319          memcpy(&nlist_32, local_syms + (i * nlist_size),
320                 sizeof(struct nlist));
321          nlist.n_un.n_strx = nlist_32.n_un.n_strx;
322          nlist.n_type = nlist_32.n_type;
323          nlist.n_sect = nlist_32.n_sect;
324          nlist.n_desc = nlist_32.n_desc;
325          nlist.n_value = nlist_32.n_value;
326        }
327        if ((nlist.n_type & N_STAB) == 0 &&
328            ((nlist.n_type & N_EXT) == 1 ||
329             ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) &&
330            nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) {
331          baton->symbols[baton->symbols_count].file_address = nlist.n_value;
332          if (baton->cputype == CPU_TYPE_ARM)
333            baton->symbols[baton->symbols_count].file_address =
334                baton->symbols[baton->symbols_count].file_address & ~1;
335          baton->symbols[baton->symbols_count].name =
336              string_table + nlist.n_un.n_strx;
337          baton->symbols_count++;
338        }
339      }
340
341      for (int i = 0; i < exported_syms_count; i++) {
342        struct nlist_64 nlist;
343        memset(&nlist, 0, sizeof(struct nlist_64));
344        if (is_64bit) {
345          memcpy(&nlist, exported_syms + (i * nlist_size),
346                 sizeof(struct nlist_64));
347        } else {
348          struct nlist nlist_32;
349          memcpy(&nlist_32, exported_syms + (i * nlist_size),
350                 sizeof(struct nlist));
351          nlist.n_un.n_strx = nlist_32.n_un.n_strx;
352          nlist.n_type = nlist_32.n_type;
353          nlist.n_sect = nlist_32.n_sect;
354          nlist.n_desc = nlist_32.n_desc;
355          nlist.n_value = nlist_32.n_value;
356        }
357        if ((nlist.n_type & N_STAB) == 0 &&
358            ((nlist.n_type & N_EXT) == 1 ||
359             ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) &&
360            nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) {
361          baton->symbols[baton->symbols_count].file_address = nlist.n_value;
362          if (baton->cputype == CPU_TYPE_ARM)
363            baton->symbols[baton->symbols_count].file_address =
364                baton->symbols[baton->symbols_count].file_address & ~1;
365          baton->symbols[baton->symbols_count].name =
366              string_table + nlist.n_un.n_strx;
367          baton->symbols_count++;
368        }
369      }
370
371      qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol),
372            symbol_compare);
373    }
374
375    if (*lc_cmd == LC_FUNCTION_STARTS) {
376      struct linkedit_data_command function_starts_cmd;
377      memcpy(&function_starts_cmd, offset,
378             sizeof(struct linkedit_data_command));
379
380      uint8_t *funcstarts_offset =
381          baton->mach_header_start + function_starts_cmd.dataoff;
382      uint8_t *function_end = funcstarts_offset + function_starts_cmd.datasize;
383      int count = 0;
384
385      while (funcstarts_offset < function_end) {
386        if (read_leb128(&funcstarts_offset) != 0) {
387          count++;
388        }
389      }
390
391      baton->function_start_addresses =
392          (uint64_t *)malloc(sizeof(uint64_t) * count);
393      baton->function_start_addresses_count = count;
394
395      funcstarts_offset =
396          baton->mach_header_start + function_starts_cmd.dataoff;
397      uint64_t current_pc = baton->text_segment_vmaddr;
398      int i = 0;
399      while (funcstarts_offset < function_end) {
400        uint64_t func_start = read_leb128(&funcstarts_offset);
401        if (func_start != 0) {
402          current_pc += func_start;
403          baton->function_start_addresses[i++] = current_pc;
404        }
405      }
406    }
407
408    offset = start_of_this_load_cmd + *lc_cmdsize;
409    cur_cmd++;
410  }
411
412  // Augment the symbol table with the function starts table -- adding symbol
413  // entries
414  // for functions that were stripped.
415
416  int unnamed_functions_to_add = 0;
417  for (int i = 0; i < baton->function_start_addresses_count; i++) {
418    struct symbol search_key;
419    search_key.file_address = baton->function_start_addresses[i];
420    if (baton->cputype == CPU_TYPE_ARM)
421      search_key.file_address = search_key.file_address & ~1;
422    struct symbol *sym =
423        bsearch(&search_key, baton->symbols, baton->symbols_count,
424                sizeof(struct symbol), symbol_compare);
425    if (sym == NULL)
426      unnamed_functions_to_add++;
427  }
428
429  baton->symbols = (struct symbol *)realloc(
430      baton->symbols, sizeof(struct symbol) *
431                          (baton->symbols_count + unnamed_functions_to_add));
432
433  int current_unnamed_symbol = 1;
434  int number_symbols_added = 0;
435  for (int i = 0; i < baton->function_start_addresses_count; i++) {
436    struct symbol search_key;
437    search_key.file_address = baton->function_start_addresses[i];
438    if (baton->cputype == CPU_TYPE_ARM)
439      search_key.file_address = search_key.file_address & ~1;
440    struct symbol *sym =
441        bsearch(&search_key, baton->symbols, baton->symbols_count,
442                sizeof(struct symbol), symbol_compare);
443    if (sym == NULL) {
444      char *name;
445      asprintf(&name, "unnamed function #%d", current_unnamed_symbol++);
446      baton->symbols[baton->symbols_count + number_symbols_added].file_address =
447          baton->function_start_addresses[i];
448      baton->symbols[baton->symbols_count + number_symbols_added].name = name;
449      number_symbols_added++;
450    }
451  }
452  baton->symbols_count += number_symbols_added;
453  qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol),
454        symbol_compare);
455
456  //    printf ("function start addresses\n");
457  //    for (int i = 0; i < baton->function_start_addresses_count; i++)
458  //    {
459  //        printf ("0x%012llx\n", baton->function_start_addresses[i]);
460  //    }
461
462  //    printf ("symbol table names & addresses\n");
463  //    for (int i = 0; i < baton->symbols_count; i++)
464  //    {
465  //        printf ("0x%012llx %s\n", baton->symbols[i].file_address,
466  //        baton->symbols[i].name);
467  //    }
468}
469
470void print_encoding_x86_64(struct baton baton, uint8_t *function_start,
471                           uint32_t encoding) {
472  int mode = encoding & UNWIND_X86_64_MODE_MASK;
473  switch (mode) {
474  case UNWIND_X86_64_MODE_RBP_FRAME: {
475    printf("frame func: CFA is rbp+%d ", 16);
476    printf(" rip=[CFA-8] rbp=[CFA-16]");
477    uint32_t saved_registers_offset =
478        EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_OFFSET);
479
480    uint32_t saved_registers_locations =
481        EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
482
483    saved_registers_offset += 2;
484
485    for (int i = 0; i < 5; i++) {
486      switch (saved_registers_locations & 0x7) {
487      case UNWIND_X86_64_REG_NONE:
488        break;
489      case UNWIND_X86_64_REG_RBX:
490        printf(" rbx=[CFA-%d]", saved_registers_offset * 8);
491        break;
492      case UNWIND_X86_64_REG_R12:
493        printf(" r12=[CFA-%d]", saved_registers_offset * 8);
494        break;
495      case UNWIND_X86_64_REG_R13:
496        printf(" r13=[CFA-%d]", saved_registers_offset * 8);
497        break;
498      case UNWIND_X86_64_REG_R14:
499        printf(" r14=[CFA-%d]", saved_registers_offset * 8);
500        break;
501      case UNWIND_X86_64_REG_R15:
502        printf(" r15=[CFA-%d]", saved_registers_offset * 8);
503        break;
504      }
505      saved_registers_offset--;
506      saved_registers_locations >>= 3;
507    }
508  } break;
509
510  case UNWIND_X86_64_MODE_STACK_IND:
511  case UNWIND_X86_64_MODE_STACK_IMMD: {
512    uint32_t stack_size =
513        EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
514    uint32_t register_count =
515        EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT);
516    uint32_t permutation =
517        EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION);
518
519    if (mode == UNWIND_X86_64_MODE_STACK_IND && function_start) {
520      uint32_t stack_adjust =
521          EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST);
522
523      // offset into the function instructions; 0 == beginning of first
524      // instruction
525      uint32_t offset_to_subl_insn =
526          EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
527
528      stack_size = *((uint32_t *)(function_start + offset_to_subl_insn));
529
530      stack_size += stack_adjust * 8;
531
532      printf("large stack ");
533    }
534
535    if (mode == UNWIND_X86_64_MODE_STACK_IND) {
536      printf("frameless function: stack size %d, register count %d ",
537             stack_size * 8, register_count);
538    } else {
539      printf("frameless function: stack size %d, register count %d ",
540             stack_size, register_count);
541    }
542
543    if (register_count == 0) {
544      printf(" no registers saved");
545    } else {
546
547      // We need to include (up to) 6 registers in 10 bits.
548      // That would be 18 bits if we just used 3 bits per reg to indicate
549      // the order they're saved on the stack.
550      //
551      // This is done with Lehmer code permutation, e.g. see
552      // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
553      int permunreg[6];
554
555      // This decodes the variable-base number in the 10 bits
556      // and gives us the Lehmer code sequence which can then
557      // be decoded.
558
559      switch (register_count) {
560      case 6:
561        permunreg[0] = permutation / 120; // 120 == 5!
562        permutation -= (permunreg[0] * 120);
563        permunreg[1] = permutation / 24; // 24 == 4!
564        permutation -= (permunreg[1] * 24);
565        permunreg[2] = permutation / 6; // 6 == 3!
566        permutation -= (permunreg[2] * 6);
567        permunreg[3] = permutation / 2; // 2 == 2!
568        permutation -= (permunreg[3] * 2);
569        permunreg[4] = permutation; // 1 == 1!
570        permunreg[5] = 0;
571        break;
572      case 5:
573        permunreg[0] = permutation / 120;
574        permutation -= (permunreg[0] * 120);
575        permunreg[1] = permutation / 24;
576        permutation -= (permunreg[1] * 24);
577        permunreg[2] = permutation / 6;
578        permutation -= (permunreg[2] * 6);
579        permunreg[3] = permutation / 2;
580        permutation -= (permunreg[3] * 2);
581        permunreg[4] = permutation;
582        break;
583      case 4:
584        permunreg[0] = permutation / 60;
585        permutation -= (permunreg[0] * 60);
586        permunreg[1] = permutation / 12;
587        permutation -= (permunreg[1] * 12);
588        permunreg[2] = permutation / 3;
589        permutation -= (permunreg[2] * 3);
590        permunreg[3] = permutation;
591        break;
592      case 3:
593        permunreg[0] = permutation / 20;
594        permutation -= (permunreg[0] * 20);
595        permunreg[1] = permutation / 4;
596        permutation -= (permunreg[1] * 4);
597        permunreg[2] = permutation;
598        break;
599      case 2:
600        permunreg[0] = permutation / 5;
601        permutation -= (permunreg[0] * 5);
602        permunreg[1] = permutation;
603        break;
604      case 1:
605        permunreg[0] = permutation;
606        break;
607      }
608
609      // Decode the Lehmer code for this permutation of
610      // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
611
612      int registers[6];
613      bool used[7] = {false, false, false, false, false, false, false};
614      for (int i = 0; i < register_count; i++) {
615        int renum = 0;
616        for (int j = 1; j < 7; j++) {
617          if (used[j] == false) {
618            if (renum == permunreg[i]) {
619              registers[i] = j;
620              used[j] = true;
621              break;
622            }
623            renum++;
624          }
625        }
626      }
627
628      if (mode == UNWIND_X86_64_MODE_STACK_IND) {
629        printf(" CFA is rsp+%d ", stack_size);
630      } else {
631        printf(" CFA is rsp+%d ", stack_size * 8);
632      }
633
634      uint32_t saved_registers_offset = 1;
635      printf(" rip=[CFA-%d]", saved_registers_offset * 8);
636      saved_registers_offset++;
637
638      for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) {
639        switch (registers[i]) {
640        case UNWIND_X86_64_REG_NONE:
641          break;
642        case UNWIND_X86_64_REG_RBX:
643          printf(" rbx=[CFA-%d]", saved_registers_offset * 8);
644          saved_registers_offset++;
645          break;
646        case UNWIND_X86_64_REG_R12:
647          printf(" r12=[CFA-%d]", saved_registers_offset * 8);
648          saved_registers_offset++;
649          break;
650        case UNWIND_X86_64_REG_R13:
651          printf(" r13=[CFA-%d]", saved_registers_offset * 8);
652          saved_registers_offset++;
653          break;
654        case UNWIND_X86_64_REG_R14:
655          printf(" r14=[CFA-%d]", saved_registers_offset * 8);
656          saved_registers_offset++;
657          break;
658        case UNWIND_X86_64_REG_R15:
659          printf(" r15=[CFA-%d]", saved_registers_offset * 8);
660          saved_registers_offset++;
661          break;
662        case UNWIND_X86_64_REG_RBP:
663          printf(" rbp=[CFA-%d]", saved_registers_offset * 8);
664          saved_registers_offset++;
665          break;
666        }
667      }
668    }
669
670  } break;
671
672  case UNWIND_X86_64_MODE_DWARF: {
673    uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
674    printf(
675        "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
676        ")",
677        dwarf_offset, dwarf_offset + baton.eh_section_file_address);
678  } break;
679
680  case 0: {
681    printf(" no unwind information");
682  } break;
683  }
684}
685
686void print_encoding_i386(struct baton baton, uint8_t *function_start,
687                         uint32_t encoding) {
688  int mode = encoding & UNWIND_X86_MODE_MASK;
689  switch (mode) {
690  case UNWIND_X86_MODE_EBP_FRAME: {
691    printf("frame func: CFA is ebp+%d ", 8);
692    printf(" eip=[CFA-4] ebp=[CFA-8]");
693    uint32_t saved_registers_offset =
694        EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_OFFSET);
695
696    uint32_t saved_registers_locations =
697        EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_REGISTERS);
698
699    saved_registers_offset += 2;
700
701    for (int i = 0; i < 5; i++) {
702      switch (saved_registers_locations & 0x7) {
703      case UNWIND_X86_REG_NONE:
704        break;
705      case UNWIND_X86_REG_EBX:
706        printf(" ebx=[CFA-%d]", saved_registers_offset * 4);
707        break;
708      case UNWIND_X86_REG_ECX:
709        printf(" ecx=[CFA-%d]", saved_registers_offset * 4);
710        break;
711      case UNWIND_X86_REG_EDX:
712        printf(" edx=[CFA-%d]", saved_registers_offset * 4);
713        break;
714      case UNWIND_X86_REG_EDI:
715        printf(" edi=[CFA-%d]", saved_registers_offset * 4);
716        break;
717      case UNWIND_X86_REG_ESI:
718        printf(" esi=[CFA-%d]", saved_registers_offset * 4);
719        break;
720      }
721      saved_registers_offset--;
722      saved_registers_locations >>= 3;
723    }
724  } break;
725
726  case UNWIND_X86_MODE_STACK_IND:
727  case UNWIND_X86_MODE_STACK_IMMD: {
728    uint32_t stack_size =
729        EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
730    uint32_t register_count =
731        EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT);
732    uint32_t permutation =
733        EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION);
734
735    if (mode == UNWIND_X86_MODE_STACK_IND && function_start) {
736      uint32_t stack_adjust =
737          EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST);
738
739      // offset into the function instructions; 0 == beginning of first
740      // instruction
741      uint32_t offset_to_subl_insn =
742          EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
743
744      stack_size = *((uint32_t *)(function_start + offset_to_subl_insn));
745
746      stack_size += stack_adjust * 4;
747
748      printf("large stack ");
749    }
750
751    if (mode == UNWIND_X86_MODE_STACK_IND) {
752      printf("frameless function: stack size %d, register count %d ",
753             stack_size, register_count);
754    } else {
755      printf("frameless function: stack size %d, register count %d ",
756             stack_size * 4, register_count);
757    }
758
759    if (register_count == 0) {
760      printf(" no registers saved");
761    } else {
762
763      // We need to include (up to) 6 registers in 10 bits.
764      // That would be 18 bits if we just used 3 bits per reg to indicate
765      // the order they're saved on the stack.
766      //
767      // This is done with Lehmer code permutation, e.g. see
768      // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
769      int permunreg[6];
770
771      // This decodes the variable-base number in the 10 bits
772      // and gives us the Lehmer code sequence which can then
773      // be decoded.
774
775      switch (register_count) {
776      case 6:
777        permunreg[0] = permutation / 120; // 120 == 5!
778        permutation -= (permunreg[0] * 120);
779        permunreg[1] = permutation / 24; // 24 == 4!
780        permutation -= (permunreg[1] * 24);
781        permunreg[2] = permutation / 6; // 6 == 3!
782        permutation -= (permunreg[2] * 6);
783        permunreg[3] = permutation / 2; // 2 == 2!
784        permutation -= (permunreg[3] * 2);
785        permunreg[4] = permutation; // 1 == 1!
786        permunreg[5] = 0;
787        break;
788      case 5:
789        permunreg[0] = permutation / 120;
790        permutation -= (permunreg[0] * 120);
791        permunreg[1] = permutation / 24;
792        permutation -= (permunreg[1] * 24);
793        permunreg[2] = permutation / 6;
794        permutation -= (permunreg[2] * 6);
795        permunreg[3] = permutation / 2;
796        permutation -= (permunreg[3] * 2);
797        permunreg[4] = permutation;
798        break;
799      case 4:
800        permunreg[0] = permutation / 60;
801        permutation -= (permunreg[0] * 60);
802        permunreg[1] = permutation / 12;
803        permutation -= (permunreg[1] * 12);
804        permunreg[2] = permutation / 3;
805        permutation -= (permunreg[2] * 3);
806        permunreg[3] = permutation;
807        break;
808      case 3:
809        permunreg[0] = permutation / 20;
810        permutation -= (permunreg[0] * 20);
811        permunreg[1] = permutation / 4;
812        permutation -= (permunreg[1] * 4);
813        permunreg[2] = permutation;
814        break;
815      case 2:
816        permunreg[0] = permutation / 5;
817        permutation -= (permunreg[0] * 5);
818        permunreg[1] = permutation;
819        break;
820      case 1:
821        permunreg[0] = permutation;
822        break;
823      }
824
825      // Decode the Lehmer code for this permutation of
826      // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
827
828      int registers[6];
829      bool used[7] = {false, false, false, false, false, false, false};
830      for (int i = 0; i < register_count; i++) {
831        int renum = 0;
832        for (int j = 1; j < 7; j++) {
833          if (used[j] == false) {
834            if (renum == permunreg[i]) {
835              registers[i] = j;
836              used[j] = true;
837              break;
838            }
839            renum++;
840          }
841        }
842      }
843
844      if (mode == UNWIND_X86_MODE_STACK_IND) {
845        printf(" CFA is esp+%d ", stack_size);
846      } else {
847        printf(" CFA is esp+%d ", stack_size * 4);
848      }
849
850      uint32_t saved_registers_offset = 1;
851      printf(" eip=[CFA-%d]", saved_registers_offset * 4);
852      saved_registers_offset++;
853
854      for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) {
855        switch (registers[i]) {
856        case UNWIND_X86_REG_NONE:
857          break;
858        case UNWIND_X86_REG_EBX:
859          printf(" ebx=[CFA-%d]", saved_registers_offset * 4);
860          saved_registers_offset++;
861          break;
862        case UNWIND_X86_REG_ECX:
863          printf(" ecx=[CFA-%d]", saved_registers_offset * 4);
864          saved_registers_offset++;
865          break;
866        case UNWIND_X86_REG_EDX:
867          printf(" edx=[CFA-%d]", saved_registers_offset * 4);
868          saved_registers_offset++;
869          break;
870        case UNWIND_X86_REG_EDI:
871          printf(" edi=[CFA-%d]", saved_registers_offset * 4);
872          saved_registers_offset++;
873          break;
874        case UNWIND_X86_REG_ESI:
875          printf(" esi=[CFA-%d]", saved_registers_offset * 4);
876          saved_registers_offset++;
877          break;
878        case UNWIND_X86_REG_EBP:
879          printf(" ebp=[CFA-%d]", saved_registers_offset * 4);
880          saved_registers_offset++;
881          break;
882        }
883      }
884    }
885
886  } break;
887
888  case UNWIND_X86_MODE_DWARF: {
889    uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
890    printf(
891        "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
892        ")",
893        dwarf_offset, dwarf_offset + baton.eh_section_file_address);
894  } break;
895
896  case 0: {
897    printf(" no unwind information");
898  } break;
899  }
900}
901
902void print_encoding_arm64(struct baton baton, uint8_t *function_start,
903                          uint32_t encoding) {
904  const int wordsize = 8;
905  int mode = encoding & UNWIND_ARM64_MODE_MASK;
906  switch (mode) {
907  case UNWIND_ARM64_MODE_FRAME: {
908    printf("frame func: CFA is fp+%d ", 16);
909    printf(" pc=[CFA-8] fp=[CFA-16]");
910    int reg_pairs_saved_count = 1;
911    uint32_t saved_register_bits = encoding & 0xfff;
912    if (saved_register_bits & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
913      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
914      cfa_offset -= wordsize;
915      printf(" x19=[CFA%d]", cfa_offset);
916      cfa_offset -= wordsize;
917      printf(" x20=[CFA%d]", cfa_offset);
918      reg_pairs_saved_count++;
919    }
920    if (saved_register_bits & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
921      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
922      cfa_offset -= wordsize;
923      printf(" x21=[CFA%d]", cfa_offset);
924      cfa_offset -= wordsize;
925      printf(" x22=[CFA%d]", cfa_offset);
926      reg_pairs_saved_count++;
927    }
928    if (saved_register_bits & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
929      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
930      cfa_offset -= wordsize;
931      printf(" x23=[CFA%d]", cfa_offset);
932      cfa_offset -= wordsize;
933      printf(" x24=[CFA%d]", cfa_offset);
934      reg_pairs_saved_count++;
935    }
936    if (saved_register_bits & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
937      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
938      cfa_offset -= wordsize;
939      printf(" x25=[CFA%d]", cfa_offset);
940      cfa_offset -= wordsize;
941      printf(" x26=[CFA%d]", cfa_offset);
942      reg_pairs_saved_count++;
943    }
944    if (saved_register_bits & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
945      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
946      cfa_offset -= wordsize;
947      printf(" x27=[CFA%d]", cfa_offset);
948      cfa_offset -= wordsize;
949      printf(" x28=[CFA%d]", cfa_offset);
950      reg_pairs_saved_count++;
951    }
952    if (saved_register_bits & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
953      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
954      cfa_offset -= wordsize;
955      printf(" d8=[CFA%d]", cfa_offset);
956      cfa_offset -= wordsize;
957      printf(" d9=[CFA%d]", cfa_offset);
958      reg_pairs_saved_count++;
959    }
960    if (saved_register_bits & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
961      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
962      cfa_offset -= wordsize;
963      printf(" d10=[CFA%d]", cfa_offset);
964      cfa_offset -= wordsize;
965      printf(" d11=[CFA%d]", cfa_offset);
966      reg_pairs_saved_count++;
967    }
968    if (saved_register_bits & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
969      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
970      cfa_offset -= wordsize;
971      printf(" d12=[CFA%d]", cfa_offset);
972      cfa_offset -= wordsize;
973      printf(" d13=[CFA%d]", cfa_offset);
974      reg_pairs_saved_count++;
975    }
976    if (saved_register_bits & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
977      int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
978      cfa_offset -= wordsize;
979      printf(" d14=[CFA%d]", cfa_offset);
980      cfa_offset -= wordsize;
981      printf(" d15=[CFA%d]", cfa_offset);
982      reg_pairs_saved_count++;
983    }
984
985  } break;
986
987  case UNWIND_ARM64_MODE_FRAMELESS: {
988    uint32_t stack_size = encoding & UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK;
989    printf("frameless function: stack size %d ", stack_size * 16);
990
991  } break;
992
993  case UNWIND_ARM64_MODE_DWARF: {
994    uint32_t dwarf_offset = encoding & UNWIND_ARM64_DWARF_SECTION_OFFSET;
995    printf(
996        "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
997        ")",
998        dwarf_offset, dwarf_offset + baton.eh_section_file_address);
999  } break;
1000
1001  case 0: {
1002    printf(" no unwind information");
1003  } break;
1004  }
1005}
1006
1007void print_encoding_armv7(struct baton baton, uint8_t *function_start,
1008                          uint32_t encoding) {
1009  const int wordsize = 4;
1010  int mode = encoding & UNWIND_ARM_MODE_MASK;
1011  switch (mode) {
1012  case UNWIND_ARM_MODE_FRAME_D:
1013  case UNWIND_ARM_MODE_FRAME: {
1014    int stack_adjust =
1015        EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_STACK_ADJUST_MASK) * wordsize;
1016
1017    printf("frame func: CFA is fp+%d ", (2 * wordsize) + stack_adjust);
1018    int cfa_offset = -stack_adjust;
1019
1020    cfa_offset -= wordsize;
1021    printf(" pc=[CFA%d]", cfa_offset);
1022    cfa_offset -= wordsize;
1023    printf(" fp=[CFA%d]", cfa_offset);
1024
1025    uint32_t saved_register_bits = encoding & 0xff;
1026    if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R6) {
1027      cfa_offset -= wordsize;
1028      printf(" r6=[CFA%d]", cfa_offset);
1029    }
1030    if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R5) {
1031      cfa_offset -= wordsize;
1032      printf(" r5=[CFA%d]", cfa_offset);
1033    }
1034    if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R4) {
1035      cfa_offset -= wordsize;
1036      printf(" r4=[CFA%d]", cfa_offset);
1037    }
1038    if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R12) {
1039      cfa_offset -= wordsize;
1040      printf(" r12=[CFA%d]", cfa_offset);
1041    }
1042    if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R11) {
1043      cfa_offset -= wordsize;
1044      printf(" r11=[CFA%d]", cfa_offset);
1045    }
1046    if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R10) {
1047      cfa_offset -= wordsize;
1048      printf(" r10=[CFA%d]", cfa_offset);
1049    }
1050    if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R9) {
1051      cfa_offset -= wordsize;
1052      printf(" r9=[CFA%d]", cfa_offset);
1053    }
1054    if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R8) {
1055      cfa_offset -= wordsize;
1056      printf(" r8=[CFA%d]", cfa_offset);
1057    }
1058
1059    if (mode == UNWIND_ARM_MODE_FRAME_D) {
1060      uint32_t d_reg_bits =
1061          EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_D_REG_COUNT_MASK);
1062      switch (d_reg_bits) {
1063      case 0:
1064        // vpush {d8}
1065        cfa_offset -= 8;
1066        printf(" d8=[CFA%d]", cfa_offset);
1067        break;
1068      case 1:
1069        // vpush {d10}
1070        // vpush {d8}
1071        cfa_offset -= 8;
1072        printf(" d10=[CFA%d]", cfa_offset);
1073        cfa_offset -= 8;
1074        printf(" d8=[CFA%d]", cfa_offset);
1075        break;
1076      case 2:
1077        // vpush {d12}
1078        // vpush {d10}
1079        // vpush {d8}
1080        cfa_offset -= 8;
1081        printf(" d12=[CFA%d]", cfa_offset);
1082        cfa_offset -= 8;
1083        printf(" d10=[CFA%d]", cfa_offset);
1084        cfa_offset -= 8;
1085        printf(" d8=[CFA%d]", cfa_offset);
1086        break;
1087      case 3:
1088        // vpush {d14}
1089        // vpush {d12}
1090        // vpush {d10}
1091        // vpush {d8}
1092        cfa_offset -= 8;
1093        printf(" d14=[CFA%d]", cfa_offset);
1094        cfa_offset -= 8;
1095        printf(" d12=[CFA%d]", cfa_offset);
1096        cfa_offset -= 8;
1097        printf(" d10=[CFA%d]", cfa_offset);
1098        cfa_offset -= 8;
1099        printf(" d8=[CFA%d]", cfa_offset);
1100        break;
1101      case 4:
1102        // vpush {d14}
1103        // vpush {d12}
1104        // sp = (sp - 24) & (-16);
1105        // vst   {d8, d9, d10}
1106        printf(" d14, d12, d10, d9, d8");
1107        break;
1108      case 5:
1109        // vpush {d14}
1110        // sp = (sp - 40) & (-16);
1111        // vst   {d8, d9, d10, d11}
1112        // vst   {d12}
1113        printf(" d14, d11, d10, d9, d8, d12");
1114        break;
1115      case 6:
1116        // sp = (sp - 56) & (-16);
1117        // vst   {d8, d9, d10, d11}
1118        // vst   {d12, d13, d14}
1119        printf(" d11, d10, d9, d8, d14, d13, d12");
1120        break;
1121      case 7:
1122        // sp = (sp - 64) & (-16);
1123        // vst   {d8, d9, d10, d11}
1124        // vst   {d12, d13, d14, d15}
1125        printf(" d11, d10, d9, d8, d15, d14, d13, d12");
1126        break;
1127      }
1128    }
1129  } break;
1130
1131  case UNWIND_ARM_MODE_DWARF: {
1132    uint32_t dwarf_offset = encoding & UNWIND_ARM_DWARF_SECTION_OFFSET;
1133    printf(
1134        "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
1135        ")",
1136        dwarf_offset, dwarf_offset + baton.eh_section_file_address);
1137  } break;
1138
1139  case 0: {
1140    printf(" no unwind information");
1141  } break;
1142  }
1143}
1144
1145void print_encoding(struct baton baton, uint8_t *function_start,
1146                    uint32_t encoding) {
1147
1148  if (baton.cputype == CPU_TYPE_X86_64) {
1149    print_encoding_x86_64(baton, function_start, encoding);
1150  } else if (baton.cputype == CPU_TYPE_I386) {
1151    print_encoding_i386(baton, function_start, encoding);
1152  } else if (baton.cputype == CPU_TYPE_ARM64 || baton.cputype == CPU_TYPE_ARM64_32) {
1153    print_encoding_arm64(baton, function_start, encoding);
1154  } else if (baton.cputype == CPU_TYPE_ARM) {
1155    print_encoding_armv7(baton, function_start, encoding);
1156  } else {
1157    printf(" -- unsupported encoding arch -- ");
1158  }
1159}
1160
1161void print_function_encoding(struct baton baton, uint32_t idx,
1162                             uint32_t encoding, uint32_t entry_encoding_index,
1163                             uint32_t entry_func_offset) {
1164
1165  char *entry_encoding_index_str = "";
1166  if (entry_encoding_index != (uint32_t)-1) {
1167    asprintf(&entry_encoding_index_str, ", encoding #%d", entry_encoding_index);
1168  } else {
1169    asprintf(&entry_encoding_index_str, "");
1170  }
1171
1172  uint64_t file_address = baton.first_level_index_entry.functionOffset +
1173                          entry_func_offset + baton.text_segment_vmaddr;
1174
1175  if (baton.cputype == CPU_TYPE_ARM)
1176    file_address = file_address & ~1;
1177
1178  printf(
1179      "    func [%d] offset %d (file addr 0x%" PRIx64 ")%s, encoding is 0x%x",
1180      idx, entry_func_offset, file_address, entry_encoding_index_str, encoding);
1181
1182  struct symbol *symbol = NULL;
1183  for (int i = 0; i < baton.symbols_count; i++) {
1184    if (i == baton.symbols_count - 1 &&
1185        baton.symbols[i].file_address <= file_address) {
1186      symbol = &(baton.symbols[i]);
1187      break;
1188    } else {
1189      if (baton.symbols[i].file_address <= file_address &&
1190          baton.symbols[i + 1].file_address > file_address) {
1191        symbol = &(baton.symbols[i]);
1192        break;
1193      }
1194    }
1195  }
1196
1197  printf("\n         ");
1198  if (symbol) {
1199    int offset = file_address - symbol->file_address;
1200
1201    // FIXME this is a poor heuristic - if we're greater than 16 bytes past the
1202    // start of the function, this is the unwind info for a stripped function.
1203    // In reality the compact unwind entry may not line up exactly with the
1204    // function bounds.
1205    if (offset >= 0) {
1206      printf("name: %s", symbol->name);
1207      if (offset > 0) {
1208        printf(" + %d", offset);
1209      }
1210    }
1211    printf("\n         ");
1212  }
1213
1214  print_encoding(baton, baton.mach_header_start +
1215                            baton.first_level_index_entry.functionOffset +
1216                            baton.text_section_file_offset + entry_func_offset,
1217                 encoding);
1218
1219  bool has_lsda = encoding & UNWIND_HAS_LSDA;
1220
1221  if (has_lsda) {
1222    uint32_t func_offset =
1223        entry_func_offset + baton.first_level_index_entry.functionOffset;
1224
1225    int lsda_entry_number = -1;
1226
1227    uint32_t low = 0;
1228    uint32_t high = (baton.lsda_array_end - baton.lsda_array_start) /
1229                    sizeof(struct unwind_info_section_header_lsda_index_entry);
1230
1231    while (low < high) {
1232      uint32_t mid = (low + high) / 2;
1233
1234      uint8_t *mid_lsda_entry_addr =
1235          (baton.lsda_array_start +
1236           (mid * sizeof(struct unwind_info_section_header_lsda_index_entry)));
1237      struct unwind_info_section_header_lsda_index_entry mid_lsda_entry;
1238      memcpy(&mid_lsda_entry, mid_lsda_entry_addr,
1239             sizeof(struct unwind_info_section_header_lsda_index_entry));
1240      if (mid_lsda_entry.functionOffset == func_offset) {
1241        lsda_entry_number =
1242            (mid_lsda_entry_addr - baton.lsda_array_start) /
1243            sizeof(struct unwind_info_section_header_lsda_index_entry);
1244        break;
1245      } else if (mid_lsda_entry.functionOffset < func_offset) {
1246        low = mid + 1;
1247      } else {
1248        high = mid;
1249      }
1250    }
1251
1252    if (lsda_entry_number != -1) {
1253      printf(", LSDA entry #%d", lsda_entry_number);
1254    } else {
1255      printf(", LSDA entry not found");
1256    }
1257  }
1258
1259  uint32_t pers_idx = EXTRACT_BITS(encoding, UNWIND_PERSONALITY_MASK);
1260  if (pers_idx != 0) {
1261    pers_idx--; // Change 1-based to 0-based index
1262    printf(", personality entry #%d", pers_idx);
1263  }
1264
1265  printf("\n");
1266}
1267
1268void print_second_level_index_regular(struct baton baton) {
1269  uint8_t *page_entries =
1270      baton.compact_unwind_start +
1271      baton.first_level_index_entry.secondLevelPagesSectionOffset +
1272      baton.regular_second_level_page_header.entryPageOffset;
1273  uint32_t entries_count = baton.regular_second_level_page_header.entryCount;
1274
1275  uint8_t *offset = page_entries;
1276
1277  uint32_t idx = 0;
1278  while (idx < entries_count) {
1279    uint32_t func_offset = *((uint32_t *)(offset));
1280    uint32_t encoding = *((uint32_t *)(offset + 4));
1281
1282    // UNWIND_SECOND_LEVEL_REGULAR entries have a funcOffset which includes the
1283    // functionOffset from the containing index table already.
1284    // UNWIND_SECOND_LEVEL_COMPRESSED
1285    // entries only have the offset from the containing index table
1286    // functionOffset.
1287    // So strip off the containing index table functionOffset value here so they
1288    // can
1289    // be treated the same at the lower layers.
1290
1291    print_function_encoding(baton, idx, encoding, (uint32_t)-1,
1292                            func_offset -
1293                                baton.first_level_index_entry.functionOffset);
1294    idx++;
1295    offset += 8;
1296  }
1297}
1298
1299void print_second_level_index_compressed(struct baton baton) {
1300  uint8_t *this_index =
1301      baton.compact_unwind_start +
1302      baton.first_level_index_entry.secondLevelPagesSectionOffset;
1303  uint8_t *start_of_entries =
1304      this_index + baton.compressed_second_level_page_header.entryPageOffset;
1305  uint8_t *offset = start_of_entries;
1306  for (uint16_t idx = 0;
1307       idx < baton.compressed_second_level_page_header.entryCount; idx++) {
1308    uint32_t entry = *((uint32_t *)offset);
1309    offset += 4;
1310    uint32_t encoding;
1311
1312    uint32_t entry_encoding_index =
1313        UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry);
1314    uint32_t entry_func_offset =
1315        UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry);
1316
1317    if (entry_encoding_index < baton.unwind_header.commonEncodingsArrayCount) {
1318      // encoding is in common table in section header
1319      encoding =
1320          *((uint32_t *)(baton.compact_unwind_start +
1321                         baton.unwind_header.commonEncodingsArraySectionOffset +
1322                         (entry_encoding_index * sizeof(uint32_t))));
1323    } else {
1324      // encoding is in page specific table
1325      uint32_t page_encoding_index =
1326          entry_encoding_index - baton.unwind_header.commonEncodingsArrayCount;
1327      encoding = *((uint32_t *)(this_index +
1328                                baton.compressed_second_level_page_header
1329                                    .encodingsPageOffset +
1330                                (page_encoding_index * sizeof(uint32_t))));
1331    }
1332
1333    print_function_encoding(baton, idx, encoding, entry_encoding_index,
1334                            entry_func_offset);
1335  }
1336}
1337
1338void print_second_level_index(struct baton baton) {
1339  uint8_t *index_start =
1340      baton.compact_unwind_start +
1341      baton.first_level_index_entry.secondLevelPagesSectionOffset;
1342
1343  if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_REGULAR) {
1344    struct unwind_info_regular_second_level_page_header header;
1345    memcpy(&header, index_start,
1346           sizeof(struct unwind_info_regular_second_level_page_header));
1347    printf(
1348        "  UNWIND_SECOND_LEVEL_REGULAR #%d entryPageOffset %d, entryCount %d\n",
1349        baton.current_index_table_number, header.entryPageOffset,
1350        header.entryCount);
1351    baton.regular_second_level_page_header = header;
1352    print_second_level_index_regular(baton);
1353  }
1354
1355  if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_COMPRESSED) {
1356    struct unwind_info_compressed_second_level_page_header header;
1357    memcpy(&header, index_start,
1358           sizeof(struct unwind_info_compressed_second_level_page_header));
1359    printf("  UNWIND_SECOND_LEVEL_COMPRESSED #%d entryPageOffset %d, "
1360           "entryCount %d, encodingsPageOffset %d, encodingsCount %d\n",
1361           baton.current_index_table_number, header.entryPageOffset,
1362           header.entryCount, header.encodingsPageOffset,
1363           header.encodingsCount);
1364    baton.compressed_second_level_page_header = header;
1365    print_second_level_index_compressed(baton);
1366  }
1367}
1368
1369void print_index_sections(struct baton baton) {
1370  uint8_t *index_section_offset =
1371      baton.compact_unwind_start + baton.unwind_header.indexSectionOffset;
1372  uint32_t index_count = baton.unwind_header.indexCount;
1373
1374  uint32_t cur_idx = 0;
1375
1376  uint8_t *offset = index_section_offset;
1377  while (cur_idx < index_count) {
1378    baton.current_index_table_number = cur_idx;
1379    struct unwind_info_section_header_index_entry index_entry;
1380    memcpy(&index_entry, offset,
1381           sizeof(struct unwind_info_section_header_index_entry));
1382    printf("index section #%d: functionOffset %d, "
1383           "secondLevelPagesSectionOffset %d, lsdaIndexArraySectionOffset %d\n",
1384           cur_idx, index_entry.functionOffset,
1385           index_entry.secondLevelPagesSectionOffset,
1386           index_entry.lsdaIndexArraySectionOffset);
1387
1388    // secondLevelPagesSectionOffset == 0 means this is a sentinel entry
1389    if (index_entry.secondLevelPagesSectionOffset != 0) {
1390      struct unwind_info_section_header_index_entry next_index_entry;
1391      memcpy(&next_index_entry,
1392             offset + sizeof(struct unwind_info_section_header_index_entry),
1393             sizeof(struct unwind_info_section_header_index_entry));
1394
1395      baton.lsda_array_start =
1396          baton.compact_unwind_start + index_entry.lsdaIndexArraySectionOffset;
1397      baton.lsda_array_end = baton.compact_unwind_start +
1398                             next_index_entry.lsdaIndexArraySectionOffset;
1399
1400      uint8_t *lsda_entry_offset = baton.lsda_array_start;
1401      uint32_t lsda_count = 0;
1402      while (lsda_entry_offset < baton.lsda_array_end) {
1403        struct unwind_info_section_header_lsda_index_entry lsda_entry;
1404        memcpy(&lsda_entry, lsda_entry_offset,
1405               sizeof(struct unwind_info_section_header_lsda_index_entry));
1406        uint64_t function_file_address =
1407            baton.first_level_index_entry.functionOffset +
1408            lsda_entry.functionOffset + baton.text_segment_vmaddr;
1409        uint64_t lsda_file_address =
1410            lsda_entry.lsdaOffset + baton.text_segment_vmaddr;
1411        printf("    LSDA [%d] functionOffset %d (%d) (file address 0x%" PRIx64
1412               "), lsdaOffset %d (file address 0x%" PRIx64 ")\n",
1413               lsda_count, lsda_entry.functionOffset,
1414               lsda_entry.functionOffset - index_entry.functionOffset,
1415               function_file_address, lsda_entry.lsdaOffset, lsda_file_address);
1416        lsda_count++;
1417        lsda_entry_offset +=
1418            sizeof(struct unwind_info_section_header_lsda_index_entry);
1419      }
1420
1421      printf("\n");
1422
1423      baton.first_level_index_entry = index_entry;
1424      print_second_level_index(baton);
1425    }
1426
1427    printf("\n");
1428
1429    cur_idx++;
1430    offset += sizeof(struct unwind_info_section_header_index_entry);
1431  }
1432}
1433
1434int main(int argc, char **argv) {
1435  struct stat st;
1436  char *file = argv[0];
1437  if (argc > 1)
1438    file = argv[1];
1439  int fd = open(file, O_RDONLY);
1440  if (fd == -1) {
1441    printf("Failed to open '%s'\n", file);
1442    exit(1);
1443  }
1444  fstat(fd, &st);
1445  uint8_t *file_mem =
1446      (uint8_t *)mmap(0, st.st_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
1447  if (file_mem == MAP_FAILED) {
1448    printf("Failed to mmap() '%s'\n", file);
1449  }
1450
1451  FILE *f = fopen("a.out", "r");
1452
1453  struct baton baton;
1454  baton.mach_header_start = file_mem;
1455  baton.symbols = NULL;
1456  baton.symbols_count = 0;
1457  baton.function_start_addresses = NULL;
1458  baton.function_start_addresses_count = 0;
1459
1460  scan_macho_load_commands(&baton);
1461
1462  if (baton.compact_unwind_start == NULL) {
1463    printf("could not find __TEXT,__unwind_info section\n");
1464    exit(1);
1465  }
1466
1467  struct unwind_info_section_header header;
1468  memcpy(&header, baton.compact_unwind_start,
1469         sizeof(struct unwind_info_section_header));
1470  printf("Header:\n");
1471  printf("  version %u\n", header.version);
1472  printf("  commonEncodingsArraySectionOffset is %d\n",
1473         header.commonEncodingsArraySectionOffset);
1474  printf("  commonEncodingsArrayCount is %d\n",
1475         header.commonEncodingsArrayCount);
1476  printf("  personalityArraySectionOffset is %d\n",
1477         header.personalityArraySectionOffset);
1478  printf("  personalityArrayCount is %d\n", header.personalityArrayCount);
1479  printf("  indexSectionOffset is %d\n", header.indexSectionOffset);
1480  printf("  indexCount is %d\n", header.indexCount);
1481
1482  uint8_t *common_encodings =
1483      baton.compact_unwind_start + header.commonEncodingsArraySectionOffset;
1484  uint32_t encoding_idx = 0;
1485  while (encoding_idx < header.commonEncodingsArrayCount) {
1486    uint32_t encoding = *((uint32_t *)common_encodings);
1487    printf("    Common Encoding [%d]: 0x%x ", encoding_idx, encoding);
1488    print_encoding(baton, NULL, encoding);
1489    printf("\n");
1490    common_encodings += sizeof(uint32_t);
1491    encoding_idx++;
1492  }
1493
1494  uint8_t *pers_arr =
1495      baton.compact_unwind_start + header.personalityArraySectionOffset;
1496  uint32_t pers_idx = 0;
1497  while (pers_idx < header.personalityArrayCount) {
1498    int32_t pers_delta = *((int32_t *)(baton.compact_unwind_start +
1499                                       header.personalityArraySectionOffset +
1500                                       (pers_idx * sizeof(uint32_t))));
1501    printf("    Personality [%d]: personality function ptr @ offset %d (file "
1502           "address 0x%" PRIx64 ")\n",
1503           pers_idx, pers_delta, baton.text_segment_vmaddr + pers_delta);
1504    pers_idx++;
1505    pers_arr += sizeof(uint32_t);
1506  }
1507
1508  printf("\n");
1509
1510  baton.unwind_header = header;
1511
1512  print_index_sections(baton);
1513
1514  return 0;
1515}
1516