1//===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef liblldb_Disassembler_h_
10#define liblldb_Disassembler_h_
11
12#include "lldb/Core/Address.h"
13#include "lldb/Core/EmulateInstruction.h"
14#include "lldb/Core/FormatEntity.h"
15#include "lldb/Core/Opcode.h"
16#include "lldb/Core/PluginInterface.h"
17#include "lldb/Interpreter/OptionValue.h"
18#include "lldb/Symbol/LineEntry.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Utility/ArchSpec.h"
21#include "lldb/Utility/ConstString.h"
22#include "lldb/Utility/FileSpec.h"
23#include "lldb/lldb-defines.h"
24#include "lldb/lldb-forward.h"
25#include "lldb/lldb-private-enumerations.h"
26#include "lldb/lldb-types.h"
27
28#include "llvm/ADT/StringRef.h"
29
30#include <functional>
31#include <map>
32#include <memory>
33#include <set>
34#include <string>
35#include <vector>
36
37#include <stddef.h>
38#include <stdint.h>
39#include <stdio.h>
40
41namespace llvm {
42template <typename T> class SmallVectorImpl;
43}
44
45namespace lldb_private {
46class AddressRange;
47class DataExtractor;
48class Debugger;
49class Disassembler;
50class Module;
51class Stream;
52class SymbolContext;
53class SymbolContextList;
54class Target;
55struct RegisterInfo;
56
57class Instruction {
58public:
59  Instruction(const Address &address,
60              AddressClass addr_class = AddressClass::eInvalid);
61
62  virtual ~Instruction();
63
64  const Address &GetAddress() const { return m_address; }
65
66  const char *GetMnemonic(const ExecutionContext *exe_ctx) {
67    CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
68    return m_opcode_name.c_str();
69  }
70
71  const char *GetOperands(const ExecutionContext *exe_ctx) {
72    CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
73    return m_mnemonics.c_str();
74  }
75
76  const char *GetComment(const ExecutionContext *exe_ctx) {
77    CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
78    return m_comment.c_str();
79  }
80
81  virtual void
82  CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
83
84  AddressClass GetAddressClass();
85
86  void SetAddress(const Address &addr) {
87    // Invalidate the address class to lazily discover it if we need to.
88    m_address_class = AddressClass::eInvalid;
89    m_address = addr;
90  }
91
92  /// Dump the text representation of this Instruction to a Stream
93  ///
94  /// Print the (optional) address, (optional) bytes, opcode,
95  /// operands, and instruction comments to a stream.
96  ///
97  /// \param[in] s
98  ///     The Stream to add the text to.
99  ///
100  /// \param[in] show_address
101  ///     Whether the address (using disassembly_addr_format_spec formatting)
102  ///     should be printed.
103  ///
104  /// \param[in] show_bytes
105  ///     Whether the bytes of the assembly instruction should be printed.
106  ///
107  /// \param[in] max_opcode_byte_size
108  ///     The size (in bytes) of the largest instruction in the list that
109  ///     we are printing (for text justification/alignment purposes)
110  ///     Only needed if show_bytes is true.
111  ///
112  /// \param[in] exe_ctx
113  ///     The current execution context, if available.  May be used in
114  ///     the assembling of the operands+comments for this instruction.
115  ///     Pass NULL if not applicable.
116  ///
117  /// \param[in] sym_ctx
118  ///     The SymbolContext for this instruction.
119  ///     Pass NULL if not available/computed.
120  ///     Only needed if show_address is true.
121  ///
122  /// \param[in] prev_sym_ctx
123  ///     The SymbolContext for the previous instruction.  Depending on
124  ///     the disassembly address format specification, a change in
125  ///     Symbol / Function may mean that a line is printed with the new
126  ///     symbol/function name.
127  ///     Pass NULL if unavailable, or if this is the first instruction of
128  ///     the InstructionList.
129  ///     Only needed if show_address is true.
130  ///
131  /// \param[in] disassembly_addr_format
132  ///     The format specification for how addresses are printed.
133  ///     Only needed if show_address is true.
134  ///
135  /// \param[in] max_address_text_size
136  ///     The length of the longest address string at the start of the
137  ///     disassembly line that will be printed (the
138  ///     Debugger::FormatDisassemblerAddress() string)
139  ///     so this method can properly align the instruction opcodes.
140  ///     May be 0 to indicate no indentation/alignment of the opcodes.
141  virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
142                    bool show_bytes, const ExecutionContext *exe_ctx,
143                    const SymbolContext *sym_ctx,
144                    const SymbolContext *prev_sym_ctx,
145                    const FormatEntity::Entry *disassembly_addr_format,
146                    size_t max_address_text_size);
147
148  virtual bool DoesBranch() = 0;
149
150  virtual bool HasDelaySlot();
151
152  bool CanSetBreakpoint ();
153
154  virtual size_t Decode(const Disassembler &disassembler,
155                        const DataExtractor &data,
156                        lldb::offset_t data_offset) = 0;
157
158  virtual void SetDescription(llvm::StringRef) {
159  } // May be overridden in sub-classes that have descriptions.
160
161  lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream,
162                                OptionValue::Type data_type);
163
164  lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream);
165
166  bool DumpEmulation(const ArchSpec &arch);
167
168  virtual bool TestEmulation(Stream *stream, const char *test_file_name);
169
170  bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
171               EmulateInstruction::ReadMemoryCallback read_mem_callback,
172               EmulateInstruction::WriteMemoryCallback write_mem_calback,
173               EmulateInstruction::ReadRegisterCallback read_reg_callback,
174               EmulateInstruction::WriteRegisterCallback write_reg_callback);
175
176  const Opcode &GetOpcode() const { return m_opcode; }
177
178  uint32_t GetData(DataExtractor &data);
179
180  struct Operand {
181    enum class Type {
182      Invalid = 0,
183      Register,
184      Immediate,
185      Dereference,
186      Sum,
187      Product
188    } m_type = Type::Invalid;
189    std::vector<Operand> m_children;
190    lldb::addr_t m_immediate = 0;
191    ConstString m_register;
192    bool m_negative = false;
193    bool m_clobbered = false;
194
195    bool IsValid() { return m_type != Type::Invalid; }
196
197    static Operand BuildRegister(ConstString &r);
198    static Operand BuildImmediate(lldb::addr_t imm, bool neg);
199    static Operand BuildImmediate(int64_t imm);
200    static Operand BuildDereference(const Operand &ref);
201    static Operand BuildSum(const Operand &lhs, const Operand &rhs);
202    static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
203  };
204
205  virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
206    return false;
207  }
208
209  virtual bool IsCall() { return false; }
210
211protected:
212  Address m_address; // The section offset address of this instruction
213                     // We include an address class in the Instruction class to
214                     // allow the instruction specify the
215                     // AddressClass::eCodeAlternateISA (currently used for
216                     // thumb), and also to specify data (AddressClass::eData).
217                     // The usual value will be AddressClass::eCode, but often
218                     // when disassembling memory, you might run into data.
219                     // This can help us to disassemble appropriately.
220private:
221  AddressClass m_address_class; // Use GetAddressClass () accessor function!
222
223protected:
224  Opcode m_opcode; // The opcode for this instruction
225  std::string m_opcode_name;
226  std::string m_mnemonics;
227  std::string m_comment;
228  bool m_calculated_strings;
229
230  void
231  CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
232    if (!m_calculated_strings) {
233      m_calculated_strings = true;
234      CalculateMnemonicOperandsAndComment(exe_ctx);
235    }
236  }
237};
238
239namespace OperandMatchers {
240std::function<bool(const Instruction::Operand &)>
241MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
242              std::function<bool(const Instruction::Operand &)> left,
243              std::function<bool(const Instruction::Operand &)> right);
244
245std::function<bool(const Instruction::Operand &)>
246MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
247             std::function<bool(const Instruction::Operand &)> child);
248
249std::function<bool(const Instruction::Operand &)>
250MatchRegOp(const RegisterInfo &info);
251
252std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
253
254std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
255
256std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
257
258std::function<bool(const Instruction::Operand &)>
259MatchOpType(Instruction::Operand::Type type);
260}
261
262class InstructionList {
263public:
264  InstructionList();
265  ~InstructionList();
266
267  size_t GetSize() const;
268
269  uint32_t GetMaxOpcocdeByteSize() const;
270
271  lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
272
273  //------------------------------------------------------------------
274  /// Get the index of the next branch instruction.
275  ///
276  /// Given a list of instructions, find the next branch instruction
277  /// in the list by returning an index.
278  ///
279  /// @param[in] start
280  ///     The instruction index of the first instruction to check.
281  ///
282  /// @param[in] target
283  ///     A LLDB target object that is used to resolve addresses.
284  ///
285  /// @param[in] ignore_calls
286  ///     It true, then fine the first branch instruction that isn't
287  ///     a function call (a branch that calls and returns to the next
288  ///     instruction). If false, find the instruction index of any
289  ///     branch in the list.
290  ///
291  /// @param[out] found_calls
292  ///     If non-null, this will be set to true if any calls were found in
293  ///     extending the range.
294  ///
295  /// @return
296  ///     The instruction index of the first branch that is at or past
297  ///     \a start. Returns UINT32_MAX if no matching branches are
298  ///     found.
299  //------------------------------------------------------------------
300  uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
301                                           Target &target,
302                                           bool ignore_calls,
303                                           bool *found_calls) const;
304
305  uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
306                                              Target &target);
307
308  uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
309
310  void Clear();
311
312  void Append(lldb::InstructionSP &inst_sp);
313
314  void Dump(Stream *s, bool show_address, bool show_bytes,
315            const ExecutionContext *exe_ctx);
316
317private:
318  typedef std::vector<lldb::InstructionSP> collection;
319  typedef collection::iterator iterator;
320  typedef collection::const_iterator const_iterator;
321
322  collection m_instructions;
323};
324
325class PseudoInstruction : public Instruction {
326public:
327  PseudoInstruction();
328
329  ~PseudoInstruction() override;
330
331  bool DoesBranch() override;
332
333  bool HasDelaySlot() override;
334
335  void CalculateMnemonicOperandsAndComment(
336      const ExecutionContext *exe_ctx) override {
337    // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
338    // mnemonic into Instruction::m_mnemonics, and any comment into
339    // Instruction::m_comment
340  }
341
342  size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
343                lldb::offset_t data_offset) override;
344
345  void SetOpcode(size_t opcode_size, void *opcode_data);
346
347  void SetDescription(llvm::StringRef description) override;
348
349protected:
350  std::string m_description;
351
352  DISALLOW_COPY_AND_ASSIGN(PseudoInstruction);
353};
354
355class Disassembler : public std::enable_shared_from_this<Disassembler>,
356                     public PluginInterface {
357public:
358  enum {
359    eOptionNone = 0u,
360    eOptionShowBytes = (1u << 0),
361    eOptionRawOuput = (1u << 1),
362    eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
363                                         // the current PC (mixed mode only)
364    eOptionMarkPCAddress =
365        (1u << 3) // Mark the disassembly line the contains the PC
366  };
367
368  enum HexImmediateStyle {
369    eHexStyleC,
370    eHexStyleAsm,
371  };
372
373  // FindPlugin should be lax about the flavor string (it is too annoying to
374  // have various internal uses of the disassembler fail because the global
375  // flavor string gets set wrong. Instead, if you get a flavor string you
376  // don't understand, use the default.  Folks who care to check can use the
377  // FlavorValidForArchSpec method on the disassembler they got back.
378  static lldb::DisassemblerSP
379  FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
380
381  // This version will use the value in the Target settings if flavor is NULL;
382  static lldb::DisassemblerSP
383  FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch,
384                      const char *flavor, const char *plugin_name);
385
386  static lldb::DisassemblerSP
387  DisassembleRange(const ArchSpec &arch, const char *plugin_name,
388                   const char *flavor, const ExecutionContext &exe_ctx,
389                   const AddressRange &disasm_range, bool prefer_file_cache);
390
391  static lldb::DisassemblerSP
392  DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
393                   const char *flavor, const Address &start, const void *bytes,
394                   size_t length, uint32_t max_num_instructions,
395                   bool data_from_file);
396
397  static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
398                          const char *plugin_name, const char *flavor,
399                          const ExecutionContext &exe_ctx,
400                          const AddressRange &range, uint32_t num_instructions,
401                          bool mixed_source_and_assembly,
402                          uint32_t num_mixed_context_lines, uint32_t options,
403                          Stream &strm);
404
405  static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
406                          const char *plugin_name, const char *flavor,
407                          const ExecutionContext &exe_ctx, const Address &start,
408                          uint32_t num_instructions,
409                          bool mixed_source_and_assembly,
410                          uint32_t num_mixed_context_lines, uint32_t options,
411                          Stream &strm);
412
413  static size_t
414  Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
415              const char *flavor, const ExecutionContext &exe_ctx,
416              SymbolContextList &sc_list, uint32_t num_instructions,
417              bool mixed_source_and_assembly, uint32_t num_mixed_context_lines,
418              uint32_t options, Stream &strm);
419
420  static bool
421  Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
422              const char *flavor, const ExecutionContext &exe_ctx,
423              ConstString name, Module *module,
424              uint32_t num_instructions, bool mixed_source_and_assembly,
425              uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
426
427  static bool
428  Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
429              const char *flavor, const ExecutionContext &exe_ctx,
430              uint32_t num_instructions, bool mixed_source_and_assembly,
431              uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
432
433  // Constructors and Destructors
434  Disassembler(const ArchSpec &arch, const char *flavor);
435  ~Disassembler() override;
436
437  typedef const char *(*SummaryCallback)(const Instruction &inst,
438                                         ExecutionContext *exe_context,
439                                         void *user_data);
440
441  static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger,
442                                const ArchSpec &arch,
443                                const ExecutionContext &exe_ctx,
444                                uint32_t num_instructions,
445                                bool mixed_source_and_assembly,
446                                uint32_t num_mixed_context_lines,
447                                uint32_t options, Stream &strm);
448
449  size_t ParseInstructions(const ExecutionContext *exe_ctx,
450                           const AddressRange &range, Stream *error_strm_ptr,
451                           bool prefer_file_cache);
452
453  size_t ParseInstructions(const ExecutionContext *exe_ctx,
454                           const Address &range, uint32_t num_instructions,
455                           bool prefer_file_cache);
456
457  virtual size_t DecodeInstructions(const Address &base_addr,
458                                    const DataExtractor &data,
459                                    lldb::offset_t data_offset,
460                                    size_t num_instructions, bool append,
461                                    bool data_from_file) = 0;
462
463  InstructionList &GetInstructionList();
464
465  const InstructionList &GetInstructionList() const;
466
467  const ArchSpec &GetArchitecture() const { return m_arch; }
468
469  const char *GetFlavor() const { return m_flavor.c_str(); }
470
471  virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
472                                      const char *flavor) = 0;
473
474protected:
475  // SourceLine and SourceLinesToDisplay structures are only used in the mixed
476  // source and assembly display methods internal to this class.
477
478  struct SourceLine {
479    FileSpec file;
480    uint32_t line;
481    uint32_t column;
482
483    SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {}
484
485    bool operator==(const SourceLine &rhs) const {
486      return file == rhs.file && line == rhs.line && rhs.column == column;
487    }
488
489    bool operator!=(const SourceLine &rhs) const {
490      return file != rhs.file || line != rhs.line || column != rhs.column;
491    }
492
493    bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
494  };
495
496  struct SourceLinesToDisplay {
497    std::vector<SourceLine> lines;
498
499    // index of the "current" source line, if we want to highlight that when
500    // displaying the source lines.  (as opposed to the surrounding source
501    // lines provided to give context)
502    size_t current_source_line;
503
504    // Whether to print a blank line at the end of the source lines.
505    bool print_source_context_end_eol;
506
507    SourceLinesToDisplay()
508        : lines(), current_source_line(-1), print_source_context_end_eol(true) {
509    }
510  };
511
512  // Get the function's declaration line number, hopefully a line number
513  // earlier than the opening curly brace at the start of the function body.
514  static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
515
516  // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
517  static void AddLineToSourceLineTables(
518      SourceLine &line,
519      std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
520
521  // Given a source line, determine if we should print it when we're doing
522  // mixed source & assembly output. We're currently using the
523  // target.process.thread.step-avoid-regexp setting (which is used for
524  // stepping over inlined STL functions by default) to determine what source
525  // lines to avoid showing.
526  //
527  // Returns true if this source line should be elided (if the source line
528  // should not be displayed).
529  static bool
530  ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
531                                     const SymbolContext &sc, SourceLine &line);
532
533  static bool
534  ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
535                                     const SymbolContext &sc, LineEntry &line) {
536    SourceLine sl;
537    sl.file = line.file;
538    sl.line = line.line;
539    sl.column = line.column;
540    return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
541  };
542
543  // Classes that inherit from Disassembler can see and modify these
544  ArchSpec m_arch;
545  InstructionList m_instruction_list;
546  lldb::addr_t m_base_addr;
547  std::string m_flavor;
548
549private:
550  // For Disassembler only
551  DISALLOW_COPY_AND_ASSIGN(Disassembler);
552};
553
554} // namespace lldb_private
555
556#endif // liblldb_Disassembler_h_
557