1//===-- Disassembler.h ------------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef liblldb_Disassembler_h_ 10#define liblldb_Disassembler_h_ 11 12#include "lldb/Core/Address.h" 13#include "lldb/Core/EmulateInstruction.h" 14#include "lldb/Core/FormatEntity.h" 15#include "lldb/Core/Opcode.h" 16#include "lldb/Core/PluginInterface.h" 17#include "lldb/Interpreter/OptionValue.h" 18#include "lldb/Symbol/LineEntry.h" 19#include "lldb/Target/ExecutionContext.h" 20#include "lldb/Utility/ArchSpec.h" 21#include "lldb/Utility/ConstString.h" 22#include "lldb/Utility/FileSpec.h" 23#include "lldb/lldb-defines.h" 24#include "lldb/lldb-forward.h" 25#include "lldb/lldb-private-enumerations.h" 26#include "lldb/lldb-types.h" 27 28#include "llvm/ADT/StringRef.h" 29 30#include <functional> 31#include <map> 32#include <memory> 33#include <set> 34#include <string> 35#include <vector> 36 37#include <stddef.h> 38#include <stdint.h> 39#include <stdio.h> 40 41namespace llvm { 42template <typename T> class SmallVectorImpl; 43} 44 45namespace lldb_private { 46class AddressRange; 47class DataExtractor; 48class Debugger; 49class Disassembler; 50class Module; 51class Stream; 52class SymbolContext; 53class SymbolContextList; 54class Target; 55struct RegisterInfo; 56 57class Instruction { 58public: 59 Instruction(const Address &address, 60 AddressClass addr_class = AddressClass::eInvalid); 61 62 virtual ~Instruction(); 63 64 const Address &GetAddress() const { return m_address; } 65 66 const char *GetMnemonic(const ExecutionContext *exe_ctx) { 67 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 68 return m_opcode_name.c_str(); 69 } 70 71 const char *GetOperands(const ExecutionContext *exe_ctx) { 72 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 73 return m_mnemonics.c_str(); 74 } 75 76 const char *GetComment(const ExecutionContext *exe_ctx) { 77 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 78 return m_comment.c_str(); 79 } 80 81 virtual void 82 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; 83 84 AddressClass GetAddressClass(); 85 86 void SetAddress(const Address &addr) { 87 // Invalidate the address class to lazily discover it if we need to. 88 m_address_class = AddressClass::eInvalid; 89 m_address = addr; 90 } 91 92 /// Dump the text representation of this Instruction to a Stream 93 /// 94 /// Print the (optional) address, (optional) bytes, opcode, 95 /// operands, and instruction comments to a stream. 96 /// 97 /// \param[in] s 98 /// The Stream to add the text to. 99 /// 100 /// \param[in] show_address 101 /// Whether the address (using disassembly_addr_format_spec formatting) 102 /// should be printed. 103 /// 104 /// \param[in] show_bytes 105 /// Whether the bytes of the assembly instruction should be printed. 106 /// 107 /// \param[in] max_opcode_byte_size 108 /// The size (in bytes) of the largest instruction in the list that 109 /// we are printing (for text justification/alignment purposes) 110 /// Only needed if show_bytes is true. 111 /// 112 /// \param[in] exe_ctx 113 /// The current execution context, if available. May be used in 114 /// the assembling of the operands+comments for this instruction. 115 /// Pass NULL if not applicable. 116 /// 117 /// \param[in] sym_ctx 118 /// The SymbolContext for this instruction. 119 /// Pass NULL if not available/computed. 120 /// Only needed if show_address is true. 121 /// 122 /// \param[in] prev_sym_ctx 123 /// The SymbolContext for the previous instruction. Depending on 124 /// the disassembly address format specification, a change in 125 /// Symbol / Function may mean that a line is printed with the new 126 /// symbol/function name. 127 /// Pass NULL if unavailable, or if this is the first instruction of 128 /// the InstructionList. 129 /// Only needed if show_address is true. 130 /// 131 /// \param[in] disassembly_addr_format 132 /// The format specification for how addresses are printed. 133 /// Only needed if show_address is true. 134 /// 135 /// \param[in] max_address_text_size 136 /// The length of the longest address string at the start of the 137 /// disassembly line that will be printed (the 138 /// Debugger::FormatDisassemblerAddress() string) 139 /// so this method can properly align the instruction opcodes. 140 /// May be 0 to indicate no indentation/alignment of the opcodes. 141 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, 142 bool show_bytes, const ExecutionContext *exe_ctx, 143 const SymbolContext *sym_ctx, 144 const SymbolContext *prev_sym_ctx, 145 const FormatEntity::Entry *disassembly_addr_format, 146 size_t max_address_text_size); 147 148 virtual bool DoesBranch() = 0; 149 150 virtual bool HasDelaySlot(); 151 152 bool CanSetBreakpoint (); 153 154 virtual size_t Decode(const Disassembler &disassembler, 155 const DataExtractor &data, 156 lldb::offset_t data_offset) = 0; 157 158 virtual void SetDescription(llvm::StringRef) { 159 } // May be overridden in sub-classes that have descriptions. 160 161 lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream, 162 OptionValue::Type data_type); 163 164 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream); 165 166 bool DumpEmulation(const ArchSpec &arch); 167 168 virtual bool TestEmulation(Stream *stream, const char *test_file_name); 169 170 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, 171 EmulateInstruction::ReadMemoryCallback read_mem_callback, 172 EmulateInstruction::WriteMemoryCallback write_mem_calback, 173 EmulateInstruction::ReadRegisterCallback read_reg_callback, 174 EmulateInstruction::WriteRegisterCallback write_reg_callback); 175 176 const Opcode &GetOpcode() const { return m_opcode; } 177 178 uint32_t GetData(DataExtractor &data); 179 180 struct Operand { 181 enum class Type { 182 Invalid = 0, 183 Register, 184 Immediate, 185 Dereference, 186 Sum, 187 Product 188 } m_type = Type::Invalid; 189 std::vector<Operand> m_children; 190 lldb::addr_t m_immediate = 0; 191 ConstString m_register; 192 bool m_negative = false; 193 bool m_clobbered = false; 194 195 bool IsValid() { return m_type != Type::Invalid; } 196 197 static Operand BuildRegister(ConstString &r); 198 static Operand BuildImmediate(lldb::addr_t imm, bool neg); 199 static Operand BuildImmediate(int64_t imm); 200 static Operand BuildDereference(const Operand &ref); 201 static Operand BuildSum(const Operand &lhs, const Operand &rhs); 202 static Operand BuildProduct(const Operand &lhs, const Operand &rhs); 203 }; 204 205 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { 206 return false; 207 } 208 209 virtual bool IsCall() { return false; } 210 211protected: 212 Address m_address; // The section offset address of this instruction 213 // We include an address class in the Instruction class to 214 // allow the instruction specify the 215 // AddressClass::eCodeAlternateISA (currently used for 216 // thumb), and also to specify data (AddressClass::eData). 217 // The usual value will be AddressClass::eCode, but often 218 // when disassembling memory, you might run into data. 219 // This can help us to disassemble appropriately. 220private: 221 AddressClass m_address_class; // Use GetAddressClass () accessor function! 222 223protected: 224 Opcode m_opcode; // The opcode for this instruction 225 std::string m_opcode_name; 226 std::string m_mnemonics; 227 std::string m_comment; 228 bool m_calculated_strings; 229 230 void 231 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { 232 if (!m_calculated_strings) { 233 m_calculated_strings = true; 234 CalculateMnemonicOperandsAndComment(exe_ctx); 235 } 236 } 237}; 238 239namespace OperandMatchers { 240std::function<bool(const Instruction::Operand &)> 241MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, 242 std::function<bool(const Instruction::Operand &)> left, 243 std::function<bool(const Instruction::Operand &)> right); 244 245std::function<bool(const Instruction::Operand &)> 246MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, 247 std::function<bool(const Instruction::Operand &)> child); 248 249std::function<bool(const Instruction::Operand &)> 250MatchRegOp(const RegisterInfo &info); 251 252std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); 253 254std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); 255 256std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); 257 258std::function<bool(const Instruction::Operand &)> 259MatchOpType(Instruction::Operand::Type type); 260} 261 262class InstructionList { 263public: 264 InstructionList(); 265 ~InstructionList(); 266 267 size_t GetSize() const; 268 269 uint32_t GetMaxOpcocdeByteSize() const; 270 271 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; 272 273 //------------------------------------------------------------------ 274 /// Get the index of the next branch instruction. 275 /// 276 /// Given a list of instructions, find the next branch instruction 277 /// in the list by returning an index. 278 /// 279 /// @param[in] start 280 /// The instruction index of the first instruction to check. 281 /// 282 /// @param[in] target 283 /// A LLDB target object that is used to resolve addresses. 284 /// 285 /// @param[in] ignore_calls 286 /// It true, then fine the first branch instruction that isn't 287 /// a function call (a branch that calls and returns to the next 288 /// instruction). If false, find the instruction index of any 289 /// branch in the list. 290 /// 291 /// @param[out] found_calls 292 /// If non-null, this will be set to true if any calls were found in 293 /// extending the range. 294 /// 295 /// @return 296 /// The instruction index of the first branch that is at or past 297 /// \a start. Returns UINT32_MAX if no matching branches are 298 /// found. 299 //------------------------------------------------------------------ 300 uint32_t GetIndexOfNextBranchInstruction(uint32_t start, 301 Target &target, 302 bool ignore_calls, 303 bool *found_calls) const; 304 305 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 306 Target &target); 307 308 uint32_t GetIndexOfInstructionAtAddress(const Address &addr); 309 310 void Clear(); 311 312 void Append(lldb::InstructionSP &inst_sp); 313 314 void Dump(Stream *s, bool show_address, bool show_bytes, 315 const ExecutionContext *exe_ctx); 316 317private: 318 typedef std::vector<lldb::InstructionSP> collection; 319 typedef collection::iterator iterator; 320 typedef collection::const_iterator const_iterator; 321 322 collection m_instructions; 323}; 324 325class PseudoInstruction : public Instruction { 326public: 327 PseudoInstruction(); 328 329 ~PseudoInstruction() override; 330 331 bool DoesBranch() override; 332 333 bool HasDelaySlot() override; 334 335 void CalculateMnemonicOperandsAndComment( 336 const ExecutionContext *exe_ctx) override { 337 // TODO: fill this in and put opcode name into Instruction::m_opcode_name, 338 // mnemonic into Instruction::m_mnemonics, and any comment into 339 // Instruction::m_comment 340 } 341 342 size_t Decode(const Disassembler &disassembler, const DataExtractor &data, 343 lldb::offset_t data_offset) override; 344 345 void SetOpcode(size_t opcode_size, void *opcode_data); 346 347 void SetDescription(llvm::StringRef description) override; 348 349protected: 350 std::string m_description; 351 352 DISALLOW_COPY_AND_ASSIGN(PseudoInstruction); 353}; 354 355class Disassembler : public std::enable_shared_from_this<Disassembler>, 356 public PluginInterface { 357public: 358 enum { 359 eOptionNone = 0u, 360 eOptionShowBytes = (1u << 0), 361 eOptionRawOuput = (1u << 1), 362 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains 363 // the current PC (mixed mode only) 364 eOptionMarkPCAddress = 365 (1u << 3) // Mark the disassembly line the contains the PC 366 }; 367 368 enum HexImmediateStyle { 369 eHexStyleC, 370 eHexStyleAsm, 371 }; 372 373 // FindPlugin should be lax about the flavor string (it is too annoying to 374 // have various internal uses of the disassembler fail because the global 375 // flavor string gets set wrong. Instead, if you get a flavor string you 376 // don't understand, use the default. Folks who care to check can use the 377 // FlavorValidForArchSpec method on the disassembler they got back. 378 static lldb::DisassemblerSP 379 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); 380 381 // This version will use the value in the Target settings if flavor is NULL; 382 static lldb::DisassemblerSP 383 FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch, 384 const char *flavor, const char *plugin_name); 385 386 static lldb::DisassemblerSP 387 DisassembleRange(const ArchSpec &arch, const char *plugin_name, 388 const char *flavor, const ExecutionContext &exe_ctx, 389 const AddressRange &disasm_range, bool prefer_file_cache); 390 391 static lldb::DisassemblerSP 392 DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 393 const char *flavor, const Address &start, const void *bytes, 394 size_t length, uint32_t max_num_instructions, 395 bool data_from_file); 396 397 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 398 const char *plugin_name, const char *flavor, 399 const ExecutionContext &exe_ctx, 400 const AddressRange &range, uint32_t num_instructions, 401 bool mixed_source_and_assembly, 402 uint32_t num_mixed_context_lines, uint32_t options, 403 Stream &strm); 404 405 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 406 const char *plugin_name, const char *flavor, 407 const ExecutionContext &exe_ctx, const Address &start, 408 uint32_t num_instructions, 409 bool mixed_source_and_assembly, 410 uint32_t num_mixed_context_lines, uint32_t options, 411 Stream &strm); 412 413 static size_t 414 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, 415 const char *flavor, const ExecutionContext &exe_ctx, 416 SymbolContextList &sc_list, uint32_t num_instructions, 417 bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, 418 uint32_t options, Stream &strm); 419 420 static bool 421 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, 422 const char *flavor, const ExecutionContext &exe_ctx, 423 ConstString name, Module *module, 424 uint32_t num_instructions, bool mixed_source_and_assembly, 425 uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); 426 427 static bool 428 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, 429 const char *flavor, const ExecutionContext &exe_ctx, 430 uint32_t num_instructions, bool mixed_source_and_assembly, 431 uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); 432 433 // Constructors and Destructors 434 Disassembler(const ArchSpec &arch, const char *flavor); 435 ~Disassembler() override; 436 437 typedef const char *(*SummaryCallback)(const Instruction &inst, 438 ExecutionContext *exe_context, 439 void *user_data); 440 441 static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger, 442 const ArchSpec &arch, 443 const ExecutionContext &exe_ctx, 444 uint32_t num_instructions, 445 bool mixed_source_and_assembly, 446 uint32_t num_mixed_context_lines, 447 uint32_t options, Stream &strm); 448 449 size_t ParseInstructions(const ExecutionContext *exe_ctx, 450 const AddressRange &range, Stream *error_strm_ptr, 451 bool prefer_file_cache); 452 453 size_t ParseInstructions(const ExecutionContext *exe_ctx, 454 const Address &range, uint32_t num_instructions, 455 bool prefer_file_cache); 456 457 virtual size_t DecodeInstructions(const Address &base_addr, 458 const DataExtractor &data, 459 lldb::offset_t data_offset, 460 size_t num_instructions, bool append, 461 bool data_from_file) = 0; 462 463 InstructionList &GetInstructionList(); 464 465 const InstructionList &GetInstructionList() const; 466 467 const ArchSpec &GetArchitecture() const { return m_arch; } 468 469 const char *GetFlavor() const { return m_flavor.c_str(); } 470 471 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, 472 const char *flavor) = 0; 473 474protected: 475 // SourceLine and SourceLinesToDisplay structures are only used in the mixed 476 // source and assembly display methods internal to this class. 477 478 struct SourceLine { 479 FileSpec file; 480 uint32_t line; 481 uint32_t column; 482 483 SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {} 484 485 bool operator==(const SourceLine &rhs) const { 486 return file == rhs.file && line == rhs.line && rhs.column == column; 487 } 488 489 bool operator!=(const SourceLine &rhs) const { 490 return file != rhs.file || line != rhs.line || column != rhs.column; 491 } 492 493 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } 494 }; 495 496 struct SourceLinesToDisplay { 497 std::vector<SourceLine> lines; 498 499 // index of the "current" source line, if we want to highlight that when 500 // displaying the source lines. (as opposed to the surrounding source 501 // lines provided to give context) 502 size_t current_source_line; 503 504 // Whether to print a blank line at the end of the source lines. 505 bool print_source_context_end_eol; 506 507 SourceLinesToDisplay() 508 : lines(), current_source_line(-1), print_source_context_end_eol(true) { 509 } 510 }; 511 512 // Get the function's declaration line number, hopefully a line number 513 // earlier than the opening curly brace at the start of the function body. 514 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); 515 516 // Add the provided SourceLine to the map of filenames-to-source-lines-seen. 517 static void AddLineToSourceLineTables( 518 SourceLine &line, 519 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); 520 521 // Given a source line, determine if we should print it when we're doing 522 // mixed source & assembly output. We're currently using the 523 // target.process.thread.step-avoid-regexp setting (which is used for 524 // stepping over inlined STL functions by default) to determine what source 525 // lines to avoid showing. 526 // 527 // Returns true if this source line should be elided (if the source line 528 // should not be displayed). 529 static bool 530 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 531 const SymbolContext &sc, SourceLine &line); 532 533 static bool 534 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 535 const SymbolContext &sc, LineEntry &line) { 536 SourceLine sl; 537 sl.file = line.file; 538 sl.line = line.line; 539 sl.column = line.column; 540 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); 541 }; 542 543 // Classes that inherit from Disassembler can see and modify these 544 ArchSpec m_arch; 545 InstructionList m_instruction_list; 546 lldb::addr_t m_base_addr; 547 std::string m_flavor; 548 549private: 550 // For Disassembler only 551 DISALLOW_COPY_AND_ASSIGN(Disassembler); 552}; 553 554} // namespace lldb_private 555 556#endif // liblldb_Disassembler_h_ 557