1//===-- TraceHTR.cpp ------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "TraceHTR.h"
10
11#include "lldb/Symbol/Function.h"
12#include "lldb/Target/Process.h"
13#include "lldb/Target/Target.h"
14#include "llvm/Support/JSON.h"
15#include <optional>
16#include <sstream>
17#include <string>
18
19using namespace lldb_private;
20using namespace lldb;
21
22size_t HTRBlockMetadata::GetNumInstructions() const {
23  return m_num_instructions;
24}
25
26std::optional<llvm::StringRef>
27HTRBlockMetadata::GetMostFrequentlyCalledFunction() const {
28  size_t max_ncalls = 0;
29  std::optional<llvm::StringRef> max_name;
30  for (const auto &it : m_func_calls) {
31    ConstString name = it.first;
32    size_t ncalls = it.second;
33    if (ncalls > max_ncalls) {
34      max_ncalls = ncalls;
35      max_name = name.GetStringRef();
36    }
37  }
38  return max_name;
39}
40
41llvm::DenseMap<ConstString, size_t> const &
42HTRBlockMetadata::GetFunctionCalls() const {
43  return m_func_calls;
44}
45
46lldb::addr_t HTRBlockMetadata::GetFirstInstructionLoadAddress() const {
47  return m_first_instruction_load_address;
48}
49
50size_t HTRBlock::GetOffset() const { return m_offset; }
51
52size_t HTRBlock::GetSize() const { return m_size; }
53
54HTRBlockMetadata const &HTRBlock::GetMetadata() const { return m_metadata; }
55
56llvm::ArrayRef<HTRBlockLayerUP> TraceHTR::GetBlockLayers() const {
57  return m_block_layer_ups;
58}
59
60HTRInstructionLayer const &TraceHTR::GetInstructionLayer() const {
61  return *m_instruction_layer_up;
62}
63
64void TraceHTR::AddNewBlockLayer(HTRBlockLayerUP &&block_layer) {
65  m_block_layer_ups.emplace_back(std::move(block_layer));
66}
67
68size_t IHTRLayer::GetLayerId() const { return m_layer_id; }
69
70void HTRBlockLayer::AppendNewBlock(size_t block_id, HTRBlock &&block) {
71  m_block_id_trace.emplace_back(block_id);
72  m_block_defs.emplace(block_id, std::move(block));
73}
74
75void HTRBlockLayer::AppendRepeatedBlock(size_t block_id) {
76  m_block_id_trace.emplace_back(block_id);
77}
78
79llvm::ArrayRef<lldb::addr_t> HTRInstructionLayer::GetInstructionTrace() const {
80  return m_instruction_trace;
81}
82
83void HTRInstructionLayer::AddCallInstructionMetadata(
84    lldb::addr_t load_addr, std::optional<ConstString> func_name) {
85  m_call_isns.emplace(load_addr, func_name);
86}
87
88void HTRInstructionLayer::AppendInstruction(lldb::addr_t load_addr) {
89  m_instruction_trace.emplace_back(load_addr);
90}
91
92HTRBlock const *HTRBlockLayer::GetBlockById(size_t block_id) const {
93  auto block_it = m_block_defs.find(block_id);
94  if (block_it == m_block_defs.end())
95    return nullptr;
96  else
97    return &block_it->second;
98}
99
100llvm::ArrayRef<size_t> HTRBlockLayer::GetBlockIdTrace() const {
101  return m_block_id_trace;
102}
103
104size_t HTRBlockLayer::GetNumUnits() const { return m_block_id_trace.size(); }
105
106HTRBlockMetadata HTRInstructionLayer::GetMetadataByIndex(size_t index) const {
107  lldb::addr_t instruction_load_address = m_instruction_trace[index];
108  llvm::DenseMap<ConstString, size_t> func_calls;
109
110  auto func_name_it = m_call_isns.find(instruction_load_address);
111  if (func_name_it != m_call_isns.end()) {
112    if (std::optional<ConstString> func_name = func_name_it->second) {
113      func_calls[*func_name] = 1;
114    }
115  }
116  return {instruction_load_address, 1, std::move(func_calls)};
117}
118
119size_t HTRInstructionLayer::GetNumUnits() const {
120  return m_instruction_trace.size();
121}
122
123HTRBlockMetadata HTRBlockLayer::GetMetadataByIndex(size_t index) const {
124  size_t block_id = m_block_id_trace[index];
125  HTRBlock block = m_block_defs.find(block_id)->second;
126  return block.GetMetadata();
127}
128
129TraceHTR::TraceHTR(Thread &thread, TraceCursor &cursor)
130    : m_instruction_layer_up(std::make_unique<HTRInstructionLayer>(0)) {
131
132  // Move cursor to the first instruction in the trace
133  cursor.SetForwards(true);
134  cursor.Seek(0, lldb::eTraceCursorSeekTypeBeginning);
135
136  // TODO: fix after persona0220's patch on a new way to access instruction
137  // kinds
138  /*
139  Target &target = thread.GetProcess()->GetTarget();
140  auto function_name_from_load_address =
141      [&](lldb::addr_t load_address) -> std::optional<ConstString> {
142    lldb_private::Address pc_addr;
143    SymbolContext sc;
144    if (target.ResolveLoadAddress(load_address, pc_addr) &&
145        pc_addr.CalculateSymbolContext(&sc))
146      return sc.GetFunctionName()
147                 ? std::optional<ConstString>(sc.GetFunctionName())
148                 : std::nullopt;
149    else
150      return std::nullopt;
151  };
152
153  while (cursor.HasValue()) { if (cursor.IsError()) {
154      // Append a load address of 0 for all instructions that an error occured
155      // while decoding.
156      // TODO: Make distinction between errors by storing the error messages.
157      // Currently, all errors are treated the same.
158      m_instruction_layer_up->AppendInstruction(0);
159      cursor.Next();
160    } else if (cursor.IsEvent()) {
161      cursor.Next();
162    } else {
163      lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress();
164      lldb::InstructionControlFlowKind current_instruction_type =
165          cursor.GetInstructionControlFlowKind();
166
167      m_instruction_layer_up->AppendInstruction(
168          current_instruction_load_address);
169      cursor.Next();
170      bool more_data_in_trace = cursor.HasValue();
171      if (current_instruction_type &
172          lldb::eInstructionControlFlowKindCall) {
173        if (more_data_in_trace && !cursor.IsError()) {
174          m_instruction_layer_up->AddCallInstructionMetadata(
175              current_instruction_load_address,
176              function_name_from_load_address(cursor.GetLoadAddress()));
177        } else {
178          // Next instruction is not known - pass None to indicate the name
179          // of the function being called is not known
180          m_instruction_layer_up->AddCallInstructionMetadata(
181              current_instruction_load_address, std::nullopt);
182        }
183      }
184    }
185  }
186  */
187}
188
189void HTRBlockMetadata::MergeMetadata(
190    HTRBlockMetadata &merged_metadata,
191    HTRBlockMetadata const &metadata_to_merge) {
192  merged_metadata.m_num_instructions += metadata_to_merge.m_num_instructions;
193  for (const auto &it : metadata_to_merge.m_func_calls) {
194    ConstString name = it.first;
195    size_t num_calls = it.second;
196    merged_metadata.m_func_calls[name] += num_calls;
197  }
198}
199
200HTRBlock IHTRLayer::MergeUnits(size_t start_unit_index, size_t num_units) {
201  // TODO: make this function take `end_unit_index` as a parameter instead of
202  // unit and merge the range [start_unit_indx, end_unit_index] inclusive.
203  HTRBlockMetadata merged_metadata = GetMetadataByIndex(start_unit_index);
204  for (size_t i = start_unit_index + 1; i < start_unit_index + num_units; i++) {
205    // merge the new metadata into merged_metadata
206    HTRBlockMetadata::MergeMetadata(merged_metadata, GetMetadataByIndex(i));
207  }
208  return {start_unit_index, num_units, merged_metadata};
209}
210
211void TraceHTR::ExecutePasses() {
212  auto are_passes_done = [](IHTRLayer &l1, IHTRLayer &l2) {
213    return l1.GetNumUnits() == l2.GetNumUnits();
214  };
215  HTRBlockLayerUP current_block_layer_up =
216      BasicSuperBlockMerge(*m_instruction_layer_up);
217  HTRBlockLayer &current_block_layer = *current_block_layer_up;
218  if (are_passes_done(*m_instruction_layer_up, *current_block_layer_up))
219    return;
220
221  AddNewBlockLayer(std::move(current_block_layer_up));
222  while (true) {
223    HTRBlockLayerUP new_block_layer_up =
224        BasicSuperBlockMerge(current_block_layer);
225    if (are_passes_done(current_block_layer, *new_block_layer_up))
226      return;
227
228    current_block_layer = *new_block_layer_up;
229    AddNewBlockLayer(std::move(new_block_layer_up));
230  }
231}
232
233llvm::Error TraceHTR::Export(std::string outfile) {
234  std::error_code ec;
235  llvm::raw_fd_ostream os(outfile, ec, llvm::sys::fs::OF_Text);
236  if (ec) {
237    return llvm::make_error<llvm::StringError>(
238        "unable to open destination file: " + outfile, os.error());
239  } else {
240    os << toJSON(*this);
241    os.close();
242    if (os.has_error()) {
243      return llvm::make_error<llvm::StringError>(
244          "unable to write to destination file: " + outfile, os.error());
245    }
246  }
247  return llvm::Error::success();
248}
249
250HTRBlockLayerUP lldb_private::BasicSuperBlockMerge(IHTRLayer &layer) {
251  std::unique_ptr<HTRBlockLayer> new_block_layer =
252      std::make_unique<HTRBlockLayer>(layer.GetLayerId() + 1);
253
254  if (layer.GetNumUnits()) {
255    // Future Improvement: split this into two functions - one for finding heads
256    // and tails, one for merging/creating the next layer A 'head' is defined to
257    // be a block whose occurrences in the trace do not have a unique preceding
258    // block.
259    std::unordered_set<size_t> heads;
260
261    // The load address of the first instruction of a block is the unique ID for
262    // that block (i.e. blocks with the same first instruction load address are
263    // the same block)
264
265    // Future Improvement: no need to store all its preceding block ids, all we
266    // care about is that there is more than one preceding block id, so an enum
267    // could be used
268    std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> head_map;
269    lldb::addr_t prev_id =
270        layer.GetMetadataByIndex(0).GetFirstInstructionLoadAddress();
271    size_t num_units = layer.GetNumUnits();
272    // This excludes the first unit since it has no previous unit
273    for (size_t i = 1; i < num_units; i++) {
274      lldb::addr_t current_id =
275          layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
276      head_map[current_id].insert(prev_id);
277      prev_id = current_id;
278    }
279    for (const auto &it : head_map) {
280      // ID of 0 represents an error - errors can't be heads or tails
281      lldb::addr_t id = it.first;
282      const std::unordered_set<lldb::addr_t> predecessor_set = it.second;
283      if (id && predecessor_set.size() > 1)
284        heads.insert(id);
285    }
286
287    // Future Improvement: identify heads and tails in the same loop
288    // A 'tail' is defined to be a block whose occurrences in the trace do
289    // not have a unique succeeding block.
290    std::unordered_set<lldb::addr_t> tails;
291    std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> tail_map;
292
293    // This excludes the last unit since it has no next unit
294    for (size_t i = 0; i < num_units - 1; i++) {
295      lldb::addr_t current_id =
296          layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
297      lldb::addr_t next_id =
298          layer.GetMetadataByIndex(i + 1).GetFirstInstructionLoadAddress();
299      tail_map[current_id].insert(next_id);
300    }
301
302    // Mark last block as tail so the algorithm stops gracefully
303    lldb::addr_t last_id = layer.GetMetadataByIndex(num_units - 1)
304                               .GetFirstInstructionLoadAddress();
305    tails.insert(last_id);
306    for (const auto &it : tail_map) {
307      lldb::addr_t id = it.first;
308      const std::unordered_set<lldb::addr_t> successor_set = it.second;
309      // ID of 0 represents an error - errors can't be heads or tails
310      if (id && successor_set.size() > 1)
311        tails.insert(id);
312    }
313
314    // Need to keep track of size of string since things we push are variable
315    // length
316    size_t superblock_size = 0;
317    // Each super block always has the same first unit (we call this the
318    // super block head) This gurantee allows us to use the super block head as
319    // the unique key mapping to the super block it begins
320    std::optional<size_t> superblock_head;
321    auto construct_next_layer = [&](size_t merge_start, size_t n) -> void {
322      if (!superblock_head)
323        return;
324      if (new_block_layer->GetBlockById(*superblock_head)) {
325        new_block_layer->AppendRepeatedBlock(*superblock_head);
326      } else {
327        HTRBlock new_block = layer.MergeUnits(merge_start, n);
328        new_block_layer->AppendNewBlock(*superblock_head, std::move(new_block));
329      }
330    };
331
332    for (size_t i = 0; i < num_units; i++) {
333      lldb::addr_t unit_id =
334          layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
335      auto isHead = heads.count(unit_id) > 0;
336      auto isTail = tails.count(unit_id) > 0;
337
338      if (isHead && isTail) {
339        // Head logic
340        if (superblock_size) { // this handles (tail, head) adjacency -
341                               // otherwise an empty
342                               // block is created
343          // End previous super block
344          construct_next_layer(i - superblock_size, superblock_size);
345        }
346        // Current id is first in next super block since it's a head
347        superblock_head = unit_id;
348        superblock_size = 1;
349
350        // Tail logic
351        construct_next_layer(i - superblock_size + 1, superblock_size);
352        // Reset the block_head since the prev super block has come to and end
353        superblock_head = std::nullopt;
354        superblock_size = 0;
355      } else if (isHead) {
356        if (superblock_size) { // this handles (tail, head) adjacency -
357                               // otherwise an empty
358                               // block is created
359          // End previous super block
360          construct_next_layer(i - superblock_size, superblock_size);
361        }
362        // Current id is first in next super block since it's a head
363        superblock_head = unit_id;
364        superblock_size = 1;
365      } else if (isTail) {
366        if (!superblock_head)
367          superblock_head = unit_id;
368        superblock_size++;
369
370        // End previous super block
371        construct_next_layer(i - superblock_size + 1, superblock_size);
372        // Reset the block_head since the prev super block has come to and end
373        superblock_head = std::nullopt;
374        superblock_size = 0;
375      } else {
376        if (!superblock_head)
377          superblock_head = unit_id;
378        superblock_size++;
379      }
380    }
381  }
382  return new_block_layer;
383}
384
385llvm::json::Value lldb_private::toJSON(const TraceHTR &htr) {
386  std::vector<llvm::json::Value> layers_as_json;
387  for (size_t i = 0; i < htr.GetInstructionLayer().GetInstructionTrace().size();
388       i++) {
389    size_t layer_id = htr.GetInstructionLayer().GetLayerId();
390    HTRBlockMetadata metadata = htr.GetInstructionLayer().GetMetadataByIndex(i);
391    lldb::addr_t load_address = metadata.GetFirstInstructionLoadAddress();
392
393    std::string display_name;
394
395    std::stringstream stream;
396    stream << "0x" << std::hex << load_address;
397    std::string load_address_hex_string(stream.str());
398    display_name.assign(load_address_hex_string);
399
400    // name: load address of the first instruction of the block and the name
401    // of the most frequently called function from the block (if applicable)
402
403    // ph: the event type - 'X' for Complete events (see link to documentation
404    // below)
405
406    // Since trace timestamps aren't yet supported in HTR, the ts (timestamp) is
407    // based on the instruction's offset in the trace and the dur (duration) is
408    // 1 since this layer contains single instructions. Using the instruction
409    // offset and a duration of 1 oversimplifies the true timing information of
410    // the trace, nonetheless, these approximate timestamps/durations provide an
411    // clear visualization of the trace.
412
413    // ts: offset from the beginning of the trace for the first instruction in
414    // the block
415
416    // dur: 1 since this layer contains single instructions.
417
418    // pid: the ID of the HTR layer the blocks belong to
419
420    // See
421    // https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.j75x71ritcoy
422    // for documentation on the Trace Event Format
423    layers_as_json.emplace_back(llvm::json::Object{
424        {"name", display_name},
425        {"ph", "X"},
426        {"ts", (int64_t)i},
427        {"dur", 1},
428        {"pid", (int64_t)layer_id},
429    });
430  }
431
432  for (const auto &layer : htr.GetBlockLayers()) {
433    size_t start_ts = 0;
434    std::vector<size_t> block_id_trace = layer->GetBlockIdTrace();
435    for (size_t i = 0; i < block_id_trace.size(); i++) {
436      size_t id = block_id_trace[i];
437      // Guranteed that this ID is valid, so safe to dereference here.
438      HTRBlock block = *layer->GetBlockById(id);
439      llvm::json::Value block_json = toJSON(block);
440      size_t layer_id = layer->GetLayerId();
441
442      HTRBlockMetadata metadata = block.GetMetadata();
443
444      std::optional<llvm::StringRef> most_freq_func =
445          metadata.GetMostFrequentlyCalledFunction();
446      std::stringstream stream;
447      stream << "0x" << std::hex << metadata.GetFirstInstructionLoadAddress();
448      std::string offset_hex_string(stream.str());
449      std::string display_name =
450          most_freq_func ? offset_hex_string + ": " + most_freq_func->str()
451                         : offset_hex_string;
452
453      // Since trace timestamps aren't yet supported in HTR, the ts (timestamp)
454      // and dur (duration) are based on the block's offset in the trace and
455      // number of instructions in the block, respectively. Using the block
456      // offset and the number of instructions oversimplifies the true timing
457      // information of the trace, nonetheless, these approximate
458      // timestamps/durations provide an understandable visualization of the
459      // trace.
460      auto duration = metadata.GetNumInstructions();
461      layers_as_json.emplace_back(llvm::json::Object{
462          {"name", display_name},
463          {"ph", "X"},
464          {"ts", (int64_t)start_ts},
465          {"dur", (int64_t)duration},
466          {"pid", (int64_t)layer_id},
467          {"args", block_json},
468      });
469      start_ts += duration;
470    }
471  }
472  return layers_as_json;
473}
474
475llvm::json::Value lldb_private::toJSON(const HTRBlock &block) {
476  return llvm::json::Value(
477      llvm::json::Object{{"Metadata", block.GetMetadata()}});
478}
479
480llvm::json::Value lldb_private::toJSON(const HTRBlockMetadata &metadata) {
481  std::vector<llvm::json::Value> function_calls;
482  for (const auto &it : metadata.GetFunctionCalls()) {
483    ConstString name = it.first;
484    size_t n_calls = it.second;
485    function_calls.emplace_back(llvm::formatv("({0}: {1})", name, n_calls));
486  }
487
488  return llvm::json::Value(llvm::json::Object{
489      {"Number of Instructions", (ssize_t)metadata.GetNumInstructions()},
490      {"Functions", function_calls}});
491}
492