1/* DIE indexing
2
3   Copyright (C) 2022-2023 Free Software Foundation, Inc.
4
5   This file is part of GDB.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#ifndef GDB_DWARF2_COOKED_INDEX_H
21#define GDB_DWARF2_COOKED_INDEX_H
22
23#include "dwarf2.h"
24#include "gdbtypes.h"
25#include "symtab.h"
26#include "hashtab.h"
27#include "dwarf2/index-common.h"
28#include "gdbsupport/gdb_string_view.h"
29#include "quick-symbol.h"
30#include "gdbsupport/gdb_obstack.h"
31#include "addrmap.h"
32#include "gdbsupport/iterator-range.h"
33#include "gdbsupport/thread-pool.h"
34#include "dwarf2/mapped-index.h"
35#include "dwarf2/tag.h"
36#include "gdbsupport/range-chain.h"
37
38struct dwarf2_per_cu_data;
39
40/* Flags that describe an entry in the index.  */
41enum cooked_index_flag_enum : unsigned char
42{
43  /* True if this entry is the program's "main".  */
44  IS_MAIN = 1,
45  /* True if this entry represents a "static" object.  */
46  IS_STATIC = 2,
47  /* True if this entry is an "enum class".  */
48  IS_ENUM_CLASS = 4,
49  /* True if this entry uses the linkage name.  */
50  IS_LINKAGE = 8,
51  /* True if this entry is just for the declaration of a type, not the
52     definition.  */
53  IS_TYPE_DECLARATION = 16,
54};
55DEF_ENUM_FLAGS_TYPE (enum cooked_index_flag_enum, cooked_index_flag);
56
57/* Return true if LANG requires canonicalization.  This is used
58   primarily to work around an issue computing the name of "main".
59   This function must be kept in sync with
60   cooked_index_shard::do_finalize.  */
61
62extern bool language_requires_canonicalization (enum language lang);
63
64/* A cooked_index_entry represents a single item in the index.  Note
65   that two entries can be created for the same DIE -- one using the
66   name, and another one using the linkage name, if any.
67
68   This is an "open" class and the members are all directly
69   accessible.  It is read-only after the index has been fully read
70   and processed.  */
71struct cooked_index_entry : public allocate_on_obstack
72{
73  cooked_index_entry (sect_offset die_offset_, enum dwarf_tag tag_,
74		      cooked_index_flag flags_, const char *name_,
75		      const cooked_index_entry *parent_entry_,
76		      dwarf2_per_cu_data *per_cu_)
77    : name (name_),
78      tag (tag_),
79      flags (flags_),
80      die_offset (die_offset_),
81      parent_entry (parent_entry_),
82      per_cu (per_cu_)
83  {
84  }
85
86  /* Return true if this entry matches SEARCH_FLAGS.  */
87  bool matches (block_search_flags search_flags) const
88  {
89    /* Just reject type declarations.  */
90    if ((flags & IS_TYPE_DECLARATION) != 0)
91      return false;
92
93    if ((search_flags & SEARCH_STATIC_BLOCK) != 0
94	&& (flags & IS_STATIC) != 0)
95      return true;
96    if ((search_flags & SEARCH_GLOBAL_BLOCK) != 0
97	&& (flags & IS_STATIC) == 0)
98      return true;
99    return false;
100  }
101
102  /* Return true if this entry matches DOMAIN.  */
103  bool matches (domain_enum domain) const
104  {
105    /* Just reject type declarations.  */
106    if ((flags & IS_TYPE_DECLARATION) != 0)
107      return false;
108
109    switch (domain)
110      {
111      case LABEL_DOMAIN:
112	return false;
113
114      case MODULE_DOMAIN:
115	return tag == DW_TAG_module;
116
117      case COMMON_BLOCK_DOMAIN:
118	return tag == DW_TAG_common_block;
119      }
120
121    return true;
122  }
123
124  /* Return true if this entry matches KIND.  */
125  bool matches (enum search_domain kind) const
126  {
127    /* Just reject type declarations.  */
128    if ((flags & IS_TYPE_DECLARATION) != 0)
129      return false;
130
131    switch (kind)
132      {
133      case VARIABLES_DOMAIN:
134	return (tag == DW_TAG_variable
135		|| tag == DW_TAG_constant
136		|| tag == DW_TAG_enumerator);
137      case FUNCTIONS_DOMAIN:
138	return tag == DW_TAG_subprogram;
139      case TYPES_DOMAIN:
140	return tag_is_type (tag);
141      case MODULES_DOMAIN:
142	return tag == DW_TAG_module;
143      }
144
145    return true;
146  }
147
148  /* Construct the fully-qualified name of this entry and return a
149     pointer to it.  If allocation is needed, it will be done on
150     STORAGE.  FOR_MAIN is true if we are computing the name of the
151     "main" entry -- one marked DW_AT_main_subprogram.  This matters
152     for avoiding name canonicalization and also a related race (if
153     "main" computation is done during finalization).  */
154  const char *full_name (struct obstack *storage, bool for_main = false) const;
155
156  /* Comparison modes for the 'compare' function.  See the function
157     for a description.  */
158  enum comparison_mode
159  {
160    MATCH,
161    SORT,
162    COMPLETE,
163  };
164
165  /* Compare two strings, case-insensitively.  Return -1 if STRA is
166     less than STRB, 0 if they are equal, and 1 if STRA is greater.
167
168     When comparing, '<' is considered to be less than all other
169     printable characters.  This ensures that "t<x>" sorts before
170     "t1", which is necessary when looking up "t".  This '<' handling
171     is to ensure that certain C++ lookups work correctly.  It is
172     inexact, and applied regardless of the search language, but this
173     is ok because callers of this code do more precise filtering
174     according to their needs.  This is also why using a
175     case-insensitive comparison works even for languages that are
176     case sensitive.
177
178     MODE controls how the comparison proceeds.
179
180     MODE==SORT is used when sorting and the only special '<' handling
181     that it does is to ensure that '<' sorts before all other
182     printable characters.  This ensures that the resulting ordering
183     will be binary-searchable.
184
185     MODE==MATCH is used when searching for a symbol.  In this case,
186     STRB must always be the search name, and STRA must be the name in
187     the index that is under consideration.  In compare mode, early
188     termination of STRB may match STRA -- for example, "t<int>" and
189     "t" will be considered to be equal.  (However, if A=="t" and
190     B=="t<int>", then this will not consider them as equal.)
191
192     MODE==COMPLETE is used when searching for a symbol for
193     completion.  In this case, STRB must always be the search name,
194     and STRA must be the name in the index that is under
195     consideration.  In completion mode, early termination of STRB
196     always results in a match.  */
197  static int compare (const char *stra, const char *strb,
198		      comparison_mode mode);
199
200  /* Compare two entries by canonical name.  */
201  bool operator< (const cooked_index_entry &other) const
202  {
203    return compare (canonical, other.canonical, SORT) < 0;
204  }
205
206  /* The name as it appears in DWARF.  This always points into one of
207     the mapped DWARF sections.  Note that this may be the name or the
208     linkage name -- two entries are created for DIEs which have both
209     attributes.  */
210  const char *name;
211  /* The canonical name.  For C++ names, this may differ from NAME.
212     In all other cases, this is equal to NAME.  */
213  const char *canonical = nullptr;
214  /* The DWARF tag.  */
215  enum dwarf_tag tag;
216  /* Any flags attached to this entry.  */
217  cooked_index_flag flags;
218  /* The offset of this DIE.  */
219  sect_offset die_offset;
220  /* The parent entry.  This is NULL for top-level entries.
221     Otherwise, it points to the parent entry, such as a namespace or
222     class.  */
223  const cooked_index_entry *parent_entry;
224  /* The CU from which this entry originates.  */
225  dwarf2_per_cu_data *per_cu;
226
227private:
228
229  /* A helper method for full_name.  Emits the full scope of this
230     object, followed by the separator, to STORAGE.  If this entry has
231     a parent, its write_scope method is called first.  */
232  void write_scope (struct obstack *storage, const char *sep,
233		    bool for_name) const;
234};
235
236class cooked_index_vector;
237
238/* An index of interesting DIEs.  This is "cooked", in contrast to a
239   mapped .debug_names or .gdb_index, which are "raw".  An entry in
240   the index is of type cooked_index_entry.
241
242   Operations on the index are described below.  They are chosen to
243   make it relatively simple to implement the symtab "quick"
244   methods.  */
245class cooked_index
246{
247public:
248  cooked_index () = default;
249  DISABLE_COPY_AND_ASSIGN (cooked_index);
250
251  /* Create a new cooked_index_entry and register it with this object.
252     Entries are owned by this object.  The new item is returned.  */
253  const cooked_index_entry *add (sect_offset die_offset, enum dwarf_tag tag,
254				 cooked_index_flag flags,
255				 const char *name,
256				 const cooked_index_entry *parent_entry,
257				 dwarf2_per_cu_data *per_cu);
258
259  /* Install a new fixed addrmap from the given mutable addrmap.  */
260  void install_addrmap (addrmap_mutable *map)
261  {
262    gdb_assert (m_addrmap == nullptr);
263    m_addrmap = new (&m_storage) addrmap_fixed (&m_storage, map);
264  }
265
266  /* Finalize the index.  This should be called a single time, when
267     the index has been fully populated.  It enters all the entries
268     into the internal table.  */
269  void finalize ();
270
271  /* Wait for this index's finalization to be complete.  */
272  void wait ()
273  {
274    m_future.wait ();
275  }
276
277  friend class cooked_index_vector;
278
279  /* A simple range over part of m_entries.  */
280  typedef iterator_range<std::vector<cooked_index_entry *>::iterator> range;
281
282  /* Return a range of all the entries.  */
283  range all_entries ()
284  {
285    wait ();
286    return { m_entries.begin (), m_entries.end () };
287  }
288
289  /* Look up an entry by name.  Returns a range of all matching
290     results.  If COMPLETING is true, then a larger range, suitable
291     for completion, will be returned.  */
292  range find (const std::string &name, bool completing);
293
294private:
295
296  /* Return the entry that is believed to represent the program's
297     "main".  This will return NULL if no such entry is available.  */
298  const cooked_index_entry *get_main () const
299  {
300    return m_main;
301  }
302
303  /* Look up ADDR in the address map, and return either the
304     corresponding CU, or nullptr if the address could not be
305     found.  */
306  dwarf2_per_cu_data *lookup (CORE_ADDR addr)
307  {
308    return (dwarf2_per_cu_data *) m_addrmap->find (addr);
309  }
310
311  /* Create a new cooked_index_entry and register it with this object.
312     Entries are owned by this object.  The new item is returned.  */
313  cooked_index_entry *create (sect_offset die_offset,
314			      enum dwarf_tag tag,
315			      cooked_index_flag flags,
316			      const char *name,
317			      const cooked_index_entry *parent_entry,
318			      dwarf2_per_cu_data *per_cu)
319  {
320    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
321						name, parent_entry,
322						per_cu);
323  }
324
325  /* GNAT only emits mangled ("encoded") names in the DWARF, and does
326     not emit the module structure.  However, we need this structure
327     to do lookups.  This function recreates that structure for an
328     existing entry.  It returns the base name (last element) of the
329     full decoded name.  */
330  gdb::unique_xmalloc_ptr<char> handle_gnat_encoded_entry
331       (cooked_index_entry *entry, htab_t gnat_entries);
332
333  /* A helper method that does the work of 'finalize'.  */
334  void do_finalize ();
335
336  /* Storage for the entries.  */
337  auto_obstack m_storage;
338  /* List of all entries.  */
339  std::vector<cooked_index_entry *> m_entries;
340  /* If we found an entry with 'is_main' set, store it here.  */
341  cooked_index_entry *m_main = nullptr;
342  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
343     objects.  */
344  addrmap *m_addrmap = nullptr;
345  /* Storage for canonical names.  */
346  std::vector<gdb::unique_xmalloc_ptr<char>> m_names;
347  /* A future that tracks when the 'finalize' method is done.  Note
348     that the 'get' method is never called on this future, only
349     'wait'.  */
350  gdb::future<void> m_future;
351};
352
353/* The main index of DIEs.  The parallel DIE indexers create
354   cooked_index objects.  Then, these are all handled to a
355   cooked_index_vector for storage and final indexing.  The index is
356   made by iterating over the entries previously created.  */
357
358class cooked_index_vector : public dwarf_scanner_base
359{
360public:
361
362  /* A convenience typedef for the vector that is contained in this
363     object.  */
364  typedef std::vector<std::unique_ptr<cooked_index>> vec_type;
365
366  explicit cooked_index_vector (vec_type &&vec);
367  DISABLE_COPY_AND_ASSIGN (cooked_index_vector);
368
369  /* Wait until the finalization of the entire cooked_index_vector is
370     done.  */
371  void wait ()
372  {
373    for (auto &item : m_vector)
374      item->wait ();
375  }
376
377  ~cooked_index_vector ()
378  {
379    /* The 'finalize' methods may be run in a different thread.  If
380       this object is destroyed before these complete, then one will
381       end up writing to freed memory.  Waiting for finalization to
382       complete avoids this problem; and the cost seems ignorable
383       because creating and immediately destroying the debug info is a
384       relatively rare thing to do.  */
385    wait ();
386  }
387
388  /* A range over a vector of subranges.  */
389  typedef range_chain<cooked_index::range> range;
390
391  /* Look up an entry by name.  Returns a range of all matching
392     results.  If COMPLETING is true, then a larger range, suitable
393     for completion, will be returned.  */
394  range find (const std::string &name, bool completing);
395
396  /* Return a range of all the entries.  */
397  range all_entries ()
398  {
399    std::vector<cooked_index::range> result_range;
400    result_range.reserve (m_vector.size ());
401    for (auto &entry : m_vector)
402      result_range.push_back (entry->all_entries ());
403    return range (std::move (result_range));
404  }
405
406  /* Look up ADDR in the address map, and return either the
407     corresponding CU, or nullptr if the address could not be
408     found.  */
409  dwarf2_per_cu_data *lookup (CORE_ADDR addr);
410
411  /* Return a new vector of all the addrmaps used by all the indexes
412     held by this object.  */
413  std::vector<addrmap *> get_addrmaps ();
414
415  /* Return the entry that is believed to represent the program's
416     "main".  This will return NULL if no such entry is available.  */
417  const cooked_index_entry *get_main () const;
418
419  cooked_index_vector *index_for_writing () override
420  {
421    return this;
422  }
423
424  quick_symbol_functions_up make_quick_functions () const override;
425
426private:
427
428  /* The vector of cooked_index objects.  This is stored because the
429     entries are stored on the obstacks in those objects.  */
430  vec_type m_vector;
431};
432
433#endif /* GDB_DWARF2_COOKED_INDEX_H */
434