1//===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
10#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/BinaryFormat/Dwarf.h"
14#include "llvm/DebugInfo/DIContext.h"
15#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
16#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
17#include "llvm/Support/MD5.h"
18#include "llvm/Support/Path.h"
19#include <cstdint>
20#include <map>
21#include <string>
22#include <vector>
23
24namespace llvm {
25
26class raw_ostream;
27
28class DWARFDebugLine {
29public:
30  struct FileNameEntry {
31    FileNameEntry() = default;
32
33    DWARFFormValue Name;
34    uint64_t DirIdx = 0;
35    uint64_t ModTime = 0;
36    uint64_t Length = 0;
37    MD5::MD5Result Checksum;
38    DWARFFormValue Source;
39  };
40
41  /// Tracks which optional content types are present in a DWARF file name
42  /// entry format.
43  struct ContentTypeTracker {
44    ContentTypeTracker() = default;
45
46    /// Whether filename entries provide a modification timestamp.
47    bool HasModTime = false;
48    /// Whether filename entries provide a file size.
49    bool HasLength = false;
50    /// For v5, whether filename entries provide an MD5 checksum.
51    bool HasMD5 = false;
52    /// For v5, whether filename entries provide source text.
53    bool HasSource = false;
54
55    /// Update tracked content types with \p ContentType.
56    void trackContentType(dwarf::LineNumberEntryFormat ContentType);
57  };
58
59  struct Prologue {
60    Prologue();
61
62    /// The size in bytes of the statement information for this compilation unit
63    /// (not including the total_length field itself).
64    uint64_t TotalLength;
65    /// Version, address size (starting in v5), and DWARF32/64 format; these
66    /// parameters affect interpretation of forms (used in the directory and
67    /// file tables starting with v5).
68    dwarf::FormParams FormParams;
69    /// The number of bytes following the prologue_length field to the beginning
70    /// of the first byte of the statement program itself.
71    uint64_t PrologueLength;
72    /// In v5, size in bytes of a segment selector.
73    uint8_t SegSelectorSize;
74    /// The size in bytes of the smallest target machine instruction. Statement
75    /// program opcodes that alter the address register first multiply their
76    /// operands by this value.
77    uint8_t MinInstLength;
78    /// The maximum number of individual operations that may be encoded in an
79    /// instruction.
80    uint8_t MaxOpsPerInst;
81    /// The initial value of theis_stmtregister.
82    uint8_t DefaultIsStmt;
83    /// This parameter affects the meaning of the special opcodes. See below.
84    int8_t LineBase;
85    /// This parameter affects the meaning of the special opcodes. See below.
86    uint8_t LineRange;
87    /// The number assigned to the first special opcode.
88    uint8_t OpcodeBase;
89    /// This tracks which optional file format content types are present.
90    ContentTypeTracker ContentTypes;
91    std::vector<uint8_t> StandardOpcodeLengths;
92    std::vector<DWARFFormValue> IncludeDirectories;
93    std::vector<FileNameEntry> FileNames;
94
95    const dwarf::FormParams getFormParams() const { return FormParams; }
96    uint16_t getVersion() const { return FormParams.Version; }
97    uint8_t getAddressSize() const { return FormParams.AddrSize; }
98    bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; }
99
100    uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; }
101
102    uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; }
103
104    bool totalLengthIsValid() const;
105
106    /// Length of the prologue in bytes.
107    uint64_t getLength() const;
108
109    /// Get DWARF-version aware access to the file name entry at the provided
110    /// index.
111    const llvm::DWARFDebugLine::FileNameEntry &
112    getFileNameEntry(uint64_t Index) const;
113
114    bool hasFileAtIndex(uint64_t FileIndex) const;
115
116    std::optional<uint64_t> getLastValidFileIndex() const;
117
118    bool
119    getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
120                       DILineInfoSpecifier::FileLineInfoKind Kind,
121                       std::string &Result,
122                       sys::path::Style Style = sys::path::Style::native) const;
123
124    void clear();
125    void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
126    Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr,
127                function_ref<void(Error)> RecoverableErrorHandler,
128                const DWARFContext &Ctx, const DWARFUnit *U = nullptr);
129  };
130
131  /// Standard .debug_line state machine structure.
132  struct Row {
133    explicit Row(bool DefaultIsStmt = false);
134
135    /// Called after a row is appended to the matrix.
136    void postAppend();
137    void reset(bool DefaultIsStmt);
138    void dump(raw_ostream &OS) const;
139
140    static void dumpTableHeader(raw_ostream &OS, unsigned Indent);
141
142    static bool orderByAddress(const Row &LHS, const Row &RHS) {
143      return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
144             std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
145    }
146
147    /// The program-counter value corresponding to a machine instruction
148    /// generated by the compiler and section index pointing to the section
149    /// containg this PC. If relocation information is present then section
150    /// index is the index of the section which contains above address.
151    /// Otherwise this is object::SectionedAddress::Undef value.
152    object::SectionedAddress Address;
153    /// An unsigned integer indicating a source line number. Lines are numbered
154    /// beginning at 1. The compiler may emit the value 0 in cases where an
155    /// instruction cannot be attributed to any source line.
156    uint32_t Line;
157    /// An unsigned integer indicating a column number within a source line.
158    /// Columns are numbered beginning at 1. The value 0 is reserved to indicate
159    /// that a statement begins at the 'left edge' of the line.
160    uint16_t Column;
161    /// An unsigned integer indicating the identity of the source file
162    /// corresponding to a machine instruction.
163    uint16_t File;
164    /// An unsigned integer representing the DWARF path discriminator value
165    /// for this location.
166    uint32_t Discriminator;
167    /// An unsigned integer whose value encodes the applicable instruction set
168    /// architecture for the current instruction.
169    uint8_t Isa;
170    /// An unsigned integer representing the index of an operation within a
171    /// VLIW instruction. The index of the first operation is 0.
172    /// For non-VLIW architectures, this register will always be 0.
173    uint8_t OpIndex;
174    /// A boolean indicating that the current instruction is the beginning of a
175    /// statement.
176    uint8_t IsStmt : 1,
177        /// A boolean indicating that the current instruction is the
178        /// beginning of a basic block.
179        BasicBlock : 1,
180        /// A boolean indicating that the current address is that of the
181        /// first byte after the end of a sequence of target machine
182        /// instructions.
183        EndSequence : 1,
184        /// A boolean indicating that the current address is one (of possibly
185        /// many) where execution should be suspended for an entry breakpoint
186        /// of a function.
187        PrologueEnd : 1,
188        /// A boolean indicating that the current address is one (of possibly
189        /// many) where execution should be suspended for an exit breakpoint
190        /// of a function.
191        EpilogueBegin : 1;
192  };
193
194  /// Represents a series of contiguous machine instructions. Line table for
195  /// each compilation unit may consist of multiple sequences, which are not
196  /// guaranteed to be in the order of ascending instruction address.
197  struct Sequence {
198    Sequence();
199
200    /// Sequence describes instructions at address range [LowPC, HighPC)
201    /// and is described by line table rows [FirstRowIndex, LastRowIndex).
202    uint64_t LowPC;
203    uint64_t HighPC;
204    /// If relocation information is present then this is the index of the
205    /// section which contains above addresses. Otherwise this is
206    /// object::SectionedAddress::Undef value.
207    uint64_t SectionIndex;
208    unsigned FirstRowIndex;
209    unsigned LastRowIndex;
210    bool Empty;
211
212    void reset();
213
214    static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
215      return std::tie(LHS.SectionIndex, LHS.HighPC) <
216             std::tie(RHS.SectionIndex, RHS.HighPC);
217    }
218
219    bool isValid() const {
220      return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
221    }
222
223    bool containsPC(object::SectionedAddress PC) const {
224      return SectionIndex == PC.SectionIndex &&
225             (LowPC <= PC.Address && PC.Address < HighPC);
226    }
227  };
228
229  struct LineTable {
230    LineTable();
231
232    /// Represents an invalid row
233    const uint32_t UnknownRowIndex = UINT32_MAX;
234
235    void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); }
236
237    void appendSequence(const DWARFDebugLine::Sequence &S) {
238      Sequences.push_back(S);
239    }
240
241    /// Returns the index of the row with file/line info for a given address,
242    /// or UnknownRowIndex if there is no such row.
243    uint32_t lookupAddress(object::SectionedAddress Address) const;
244
245    bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
246                            std::vector<uint32_t> &Result) const;
247
248    bool hasFileAtIndex(uint64_t FileIndex) const {
249      return Prologue.hasFileAtIndex(FileIndex);
250    }
251
252    std::optional<uint64_t> getLastValidFileIndex() const {
253      return Prologue.getLastValidFileIndex();
254    }
255
256    /// Extracts filename by its index in filename table in prologue.
257    /// In Dwarf 4, the files are 1-indexed and the current compilation file
258    /// name is not represented in the list. In DWARF v5, the files are
259    /// 0-indexed and the primary source file has the index 0.
260    /// Returns true on success.
261    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
262                            DILineInfoSpecifier::FileLineInfoKind Kind,
263                            std::string &Result) const {
264      return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
265    }
266
267    /// Fills the Result argument with the file and line information
268    /// corresponding to Address. Returns true on success.
269    bool getFileLineInfoForAddress(object::SectionedAddress Address,
270                                   const char *CompDir,
271                                   DILineInfoSpecifier::FileLineInfoKind Kind,
272                                   DILineInfo &Result) const;
273
274    /// Extracts directory name by its Entry in include directories table
275    /// in prologue. Returns true on success.
276    bool getDirectoryForEntry(const FileNameEntry &Entry,
277                              std::string &Directory) const;
278
279    void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
280    void clear();
281
282    /// Parse prologue and all rows.
283    Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
284                const DWARFContext &Ctx, const DWARFUnit *U,
285                function_ref<void(Error)> RecoverableErrorHandler,
286                raw_ostream *OS = nullptr, bool Verbose = false);
287
288    using RowVector = std::vector<Row>;
289    using RowIter = RowVector::const_iterator;
290    using SequenceVector = std::vector<Sequence>;
291    using SequenceIter = SequenceVector::const_iterator;
292
293    struct Prologue Prologue;
294    RowVector Rows;
295    SequenceVector Sequences;
296
297  private:
298    uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
299                          object::SectionedAddress Address) const;
300    std::optional<StringRef>
301    getSourceByIndex(uint64_t FileIndex,
302                     DILineInfoSpecifier::FileLineInfoKind Kind) const;
303
304    uint32_t lookupAddressImpl(object::SectionedAddress Address) const;
305
306    bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
307                                std::vector<uint32_t> &Result) const;
308  };
309
310  const LineTable *getLineTable(uint64_t Offset) const;
311  Expected<const LineTable *>
312  getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
313                      const DWARFContext &Ctx, const DWARFUnit *U,
314                      function_ref<void(Error)> RecoverableErrorHandler);
315  void clearLineTable(uint64_t Offset);
316
317  /// Helper to allow for parsing of an entire .debug_line section in sequence.
318  class SectionParser {
319  public:
320    using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
321
322    SectionParser(DWARFDataExtractor &Data, const DWARFContext &C,
323                  DWARFUnitVector::iterator_range Units);
324
325    /// Get the next line table from the section. Report any issues via the
326    /// handlers.
327    ///
328    /// \param RecoverableErrorHandler - any issues that don't prevent further
329    /// parsing of the table will be reported through this handler.
330    /// \param UnrecoverableErrorHandler - any issues that prevent further
331    /// parsing of the table will be reported through this handler.
332    /// \param OS - if not null, the parser will print information about the
333    /// table as it parses it.
334    /// \param Verbose - if true, the parser will print verbose information when
335    /// printing to the output.
336    LineTable parseNext(function_ref<void(Error)> RecoverableErrorHandler,
337                        function_ref<void(Error)> UnrecoverableErrorHandler,
338                        raw_ostream *OS = nullptr, bool Verbose = false);
339
340    /// Skip the current line table and go to the following line table (if
341    /// present) immediately.
342    ///
343    /// \param RecoverableErrorHandler - report any recoverable prologue
344    /// parsing issues via this handler.
345    /// \param UnrecoverableErrorHandler - report any unrecoverable prologue
346    /// parsing issues via this handler.
347    void skip(function_ref<void(Error)> RecoverableErrorHandler,
348              function_ref<void(Error)> UnrecoverableErrorHandler);
349
350    /// Indicates if the parser has parsed as much as possible.
351    ///
352    /// \note Certain problems with the line table structure might mean that
353    /// parsing stops before the end of the section is reached.
354    bool done() const { return Done; }
355
356    /// Get the offset the parser has reached.
357    uint64_t getOffset() const { return Offset; }
358
359  private:
360    DWARFUnit *prepareToParse(uint64_t Offset);
361    void moveToNextTable(uint64_t OldOffset, const Prologue &P);
362    bool hasValidVersion(uint64_t Offset);
363
364    LineToUnitMap LineToUnit;
365
366    DWARFDataExtractor &DebugLineData;
367    const DWARFContext &Context;
368    uint64_t Offset = 0;
369    bool Done = false;
370  };
371
372private:
373  struct ParsingState {
374    ParsingState(struct LineTable *LT, uint64_t TableOffset,
375                 function_ref<void(Error)> ErrorHandler);
376
377    void resetRowAndSequence();
378    void appendRowToMatrix();
379
380    struct AddrOpIndexDelta {
381      uint64_t AddrOffset;
382      int16_t OpIndexDelta;
383    };
384
385    /// Advance the address and op-index by the \p OperationAdvance value.
386    /// \returns the amount advanced by.
387    AddrOpIndexDelta advanceAddrOpIndex(uint64_t OperationAdvance,
388                                        uint8_t Opcode, uint64_t OpcodeOffset);
389
390    struct OpcodeAdvanceResults {
391      uint64_t AddrDelta;
392      int16_t OpIndexDelta;
393      uint8_t AdjustedOpcode;
394    };
395
396    /// Advance the address and op-index as required by the specified \p Opcode.
397    /// \returns the amount advanced by and the calculated adjusted opcode.
398    OpcodeAdvanceResults advanceForOpcode(uint8_t Opcode,
399                                          uint64_t OpcodeOffset);
400
401    struct SpecialOpcodeDelta {
402      uint64_t Address;
403      int32_t Line;
404      int16_t OpIndex;
405    };
406
407    /// Advance the line, address and op-index as required by the specified
408    /// special \p Opcode. \returns the address, op-index and line delta.
409    SpecialOpcodeDelta handleSpecialOpcode(uint8_t Opcode,
410                                           uint64_t OpcodeOffset);
411
412    /// Line table we're currently parsing.
413    struct LineTable *LineTable;
414    struct Row Row;
415    struct Sequence Sequence;
416
417  private:
418    uint64_t LineTableOffset;
419
420    bool ReportAdvanceAddrProblem = true;
421    bool ReportBadLineRange = true;
422    function_ref<void(Error)> ErrorHandler;
423  };
424
425  using LineTableMapTy = std::map<uint64_t, LineTable>;
426  using LineTableIter = LineTableMapTy::iterator;
427  using LineTableConstIter = LineTableMapTy::const_iterator;
428
429  LineTableMapTy LineTableMap;
430};
431
432} // end namespace llvm
433
434#endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
435