1// merge.h -- handle section merging for gold  -*- C++ -*-
2
3// Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
4// Written by Ian Lance Taylor <iant@google.com>.
5
6// This file is part of gold.
7
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// GNU General Public License for more details.
17
18// You should have received a copy of the GNU General Public License
19// along with this program; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21// MA 02110-1301, USA.
22
23#ifndef GOLD_MERGE_H
24#define GOLD_MERGE_H
25
26#include <climits>
27#include <map>
28#include <vector>
29
30#include "stringpool.h"
31#include "output.h"
32
33namespace gold
34{
35
36class Merge_map;
37
38// For each object with merge sections, we store an Object_merge_map.
39// This is used to map locations in input sections to a merged output
40// section.  The output section itself is not recorded here--it can be
41// found in the output_sections_ field of the Object.
42
43class Object_merge_map
44{
45 public:
46  Object_merge_map()
47    : first_shnum_(-1U), first_map_(),
48      second_shnum_(-1U), second_map_(),
49      section_merge_maps_()
50  { }
51
52  ~Object_merge_map();
53
54  // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
55  // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
56  // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
57  // discarded.  OUTPUT_OFFSET is relative to the start of the merged
58  // data in the output section.
59  void
60  add_mapping(const Merge_map*, unsigned int shndx, section_offset_type offset,
61	      section_size_type length, section_offset_type output_offset);
62
63  // Get the output offset for an input address.  MERGE_MAP is the map
64  // we are looking for, or NULL if we don't care.  The input address
65  // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
66  // to the offset in the output section; this will be -1 if the bytes
67  // are not being copied to the output.  This returns true if the
68  // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
69  // the start of the merged data in the output section.
70  bool
71  get_output_offset(const Merge_map*, unsigned int shndx,
72		    section_offset_type offset,
73		    section_offset_type* output_offset);
74
75  // Return whether this is the merge map for section SHNDX.
76  bool
77  is_merge_section_for(const Merge_map*, unsigned int shndx);
78
79  // Initialize an mapping from input offsets to output addresses for
80  // section SHNDX.  STARTING_ADDRESS is the output address of the
81  // merged section.
82  template<int size>
83  void
84  initialize_input_to_output_map(
85      unsigned int shndx,
86      typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
87      Unordered_map<section_offset_type,
88		    typename elfcpp::Elf_types<size>::Elf_Addr>*);
89
90 private:
91  // Map input section offsets to a length and an output section
92  // offset.  An output section offset of -1 means that this part of
93  // the input section is being discarded.
94  struct Input_merge_entry
95  {
96    // The offset in the input section.
97    section_offset_type input_offset;
98    // The length.
99    section_size_type length;
100    // The offset in the output section.
101    section_offset_type output_offset;
102  };
103
104  // A less-than comparison routine for Input_merge_entry.
105  struct Input_merge_compare
106  {
107    bool
108    operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
109    { return i1.input_offset < i2.input_offset; }
110  };
111
112  // A list of entries for a particular input section.
113  struct Input_merge_map
114  {
115    typedef std::vector<Input_merge_entry> Entries;
116
117    // We store these with the Relobj, and we look them up by input
118    // section.  It is possible to have two different merge maps
119    // associated with a single output section.  For example, this
120    // happens routinely with .rodata, when merged string constants
121    // and merged fixed size constants are both put into .rodata.  The
122    // output offset that we store is not the offset from the start of
123    // the output section; it is the offset from the start of the
124    // merged data in the output section.  That means that the caller
125    // is going to add the offset of the merged data within the output
126    // section, which means that the caller needs to know which set of
127    // merged data it found the entry in.  So it's not enough to find
128    // this data based on the input section and the output section; we
129    // also have to find it based on a set of merged data in the
130    // output section.  In order to verify that we are looking at the
131    // right data, we store a pointer to the Merge_map here, and we
132    // pass in a pointer when looking at the data.  If we are asked to
133    // look up information for a different Merge_map, we report that
134    // we don't have it, rather than trying a lookup and returning an
135    // answer which will receive the wrong offset.
136    const Merge_map* merge_map;
137    // The list of mappings.
138    Entries entries;
139    // Whether the ENTRIES field is sorted by input_offset.
140    bool sorted;
141
142    Input_merge_map()
143      : merge_map(NULL), entries(), sorted(true)
144    { }
145  };
146
147  // Map input section indices to merge maps.
148  typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps;
149
150  // Return a pointer to the Input_merge_map to use for the input
151  // section SHNDX, or NULL.
152  Input_merge_map*
153  get_input_merge_map(unsigned int shndx);
154
155  // Get or make the the Input_merge_map to use for the section SHNDX
156  // with MERGE_MAP.
157  Input_merge_map*
158  get_or_make_input_merge_map(const Merge_map* merge_map, unsigned int shndx);
159
160  // Any given object file will normally only have a couple of input
161  // sections with mergeable contents.  So we keep the first two input
162  // section numbers inline, and push any further ones into a map.  A
163  // value of -1U in first_shnum_ or second_shnum_ means that we don't
164  // have a corresponding entry.
165  unsigned int first_shnum_;
166  Input_merge_map first_map_;
167  unsigned int second_shnum_;
168  Input_merge_map second_map_;
169  Section_merge_maps section_merge_maps_;
170};
171
172// This class manages mappings from input sections to offsets in an
173// output section.  This is used where input sections are merged.  The
174// actual data is stored in fields in Object.
175
176class Merge_map
177{
178 public:
179  Merge_map()
180  { }
181
182  // Add a mapping for the bytes from OFFSET to OFFSET + LENGTH in the
183  // input section SHNDX in object OBJECT to OUTPUT_OFFSET in the
184  // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
185  // discarded.  OUTPUT_OFFSET is not the offset from the start of the
186  // output section, it is the offset from the start of the merged
187  // data within the output section.
188  void
189  add_mapping(Relobj* object, unsigned int shndx,
190	      section_offset_type offset, section_size_type length,
191	      section_offset_type output_offset);
192
193  // Return the output offset for an input address.  The input address
194  // is at offset OFFSET in section SHNDX in OBJECT.  This sets
195  // *OUTPUT_OFFSET to the offset in the output section; this will be
196  // -1 if the bytes are not being copied to the output.  This returns
197  // true if the mapping is known, false otherwise.  This returns the
198  // value stored by add_mapping, namely the offset from the start of
199  // the merged data within the output section.
200  bool
201  get_output_offset(const Relobj* object, unsigned int shndx,
202		    section_offset_type offset,
203		    section_offset_type* output_offset) const;
204
205  // Return whether this is the merge mapping for section SHNDX in
206  // OBJECT.  This should return true when get_output_offset would
207  // return true for some input offset.
208  bool
209  is_merge_section_for(const Relobj* object, unsigned int shndx) const;
210};
211
212// A general class for SHF_MERGE data, to hold functions shared by
213// fixed-size constant data and string data.
214
215class Output_merge_base : public Output_section_data
216{
217 public:
218  Output_merge_base(uint64_t entsize, uint64_t addralign)
219    : Output_section_data(addralign), merge_map_(), entsize_(entsize),
220      keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
221      input_sections_()
222  { }
223
224  // Return the entry size.
225  uint64_t
226  entsize() const
227  { return this->entsize_; }
228
229  // Whether this is a merge string section.  This is only true of
230  // Output_merge_string.
231  bool
232  is_string()
233  { return this->do_is_string(); }
234
235  // Whether this keeps input sections.
236  bool
237  keeps_input_sections() const
238  { return this->keeps_input_sections_; }
239
240  // Set the keeps-input-sections flag.  This is virtual so that sub-classes
241  // can perform additional checks.
242  void
243  set_keeps_input_sections()
244  { this->do_set_keeps_input_sections(); }
245
246  // Return the object of the first merged input section.  This used
247  // for script processing.  This is NULL if merge section is empty.
248  Relobj*
249  first_relobj() const
250  { return this->first_relobj_; }
251
252  // Return the section index of the first merged input section.  This
253  // is used for script processing.  This is valid only if merge section
254  // is not valid.
255  unsigned int
256  first_shndx() const
257  {
258    gold_assert(this->first_relobj_ != NULL);
259    return this->first_shndx_;
260  }
261
262  // Set of merged input sections.
263  typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
264
265  // Beginning of merged input sections.
266  Input_sections::const_iterator
267  input_sections_begin() const
268  {
269    gold_assert(this->keeps_input_sections_);
270    return this->input_sections_.begin();
271  }
272
273  // Beginning of merged input sections.
274  Input_sections::const_iterator
275  input_sections_end() const
276  {
277    gold_assert(this->keeps_input_sections_);
278    return this->input_sections_.end();
279  }
280
281 protected:
282  // Return the output offset for an input offset.
283  bool
284  do_output_offset(const Relobj* object, unsigned int shndx,
285		   section_offset_type offset,
286		   section_offset_type* poutput) const;
287
288  // Return whether this is the merge section for an input section.
289  bool
290  do_is_merge_section_for(const Relobj*, unsigned int shndx) const;
291
292  // Add a mapping from an OFFSET in input section SHNDX in object
293  // OBJECT to an OUTPUT_OFFSET in the output section.  OUTPUT_OFFSET
294  // is the offset from the start of the merged data in the output
295  // section.
296  void
297  add_mapping(Relobj* object, unsigned int shndx, section_offset_type offset,
298	      section_size_type length, section_offset_type output_offset)
299  {
300    this->merge_map_.add_mapping(object, shndx, offset, length, output_offset);
301  }
302
303  // This may be overriden by the child class.
304  virtual bool
305  do_is_string()
306  { return false; }
307
308  // This may be overridden by the child class.
309  virtual void
310  do_set_keeps_input_sections()
311  { this->keeps_input_sections_ = true; }
312
313  // Record the merged input section for script processing.
314  void
315  record_input_section(Relobj* relobj, unsigned int shndx);
316
317 private:
318  // A mapping from input object/section/offset to offset in output
319  // section.
320  Merge_map merge_map_;
321  // The entry size.  For fixed-size constants, this is the size of
322  // the constants.  For strings, this is the size of a character.
323  uint64_t entsize_;
324  // Whether we keep input sections.
325  bool keeps_input_sections_;
326  // Object of the first merged input section.  We use this for script
327  // processing.
328  Relobj* first_relobj_;
329  // Section index of the first merged input section.
330  unsigned int first_shndx_;
331  // Input sections.  We only keep them is keeps_input_sections_ is true.
332  Input_sections input_sections_;
333};
334
335// Handle SHF_MERGE sections with fixed-size constant data.
336
337class Output_merge_data : public Output_merge_base
338{
339 public:
340  Output_merge_data(uint64_t entsize, uint64_t addralign)
341    : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
342      input_count_(0),
343      hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
344  { }
345
346 protected:
347  // Add an input section.
348  bool
349  do_add_input_section(Relobj* object, unsigned int shndx);
350
351  // Set the final data size.
352  void
353  set_final_data_size();
354
355  // Write the data to the file.
356  void
357  do_write(Output_file*);
358
359  // Write the data to a buffer.
360  void
361  do_write_to_buffer(unsigned char*);
362
363  // Write to a map file.
364  void
365  do_print_to_mapfile(Mapfile* mapfile) const
366  { mapfile->print_output_data(this, _("** merge constants")); }
367
368  // Print merge stats to stderr.
369  void
370  do_print_merge_stats(const char* section_name);
371
372  // Set keeps-input-sections flag.
373  void
374  do_set_keeps_input_sections()
375  {
376    gold_assert(this->input_count_ == 0);
377    Output_merge_base::do_set_keeps_input_sections();
378  }
379
380 private:
381  // We build a hash table of the fixed-size constants.  Each constant
382  // is stored as a pointer into the section data we are accumulating.
383
384  // A key in the hash table.  This is an offset in the section
385  // contents we are building.
386  typedef section_offset_type Merge_data_key;
387
388  // Compute the hash code.  To do this we need a pointer back to the
389  // object holding the data.
390  class Merge_data_hash
391  {
392   public:
393    Merge_data_hash(const Output_merge_data* pomd)
394      : pomd_(pomd)
395    { }
396
397    size_t
398    operator()(Merge_data_key) const;
399
400   private:
401    const Output_merge_data* pomd_;
402  };
403
404  friend class Merge_data_hash;
405
406  // Compare two entries in the hash table for equality.  To do this
407  // we need a pointer back to the object holding the data.  Note that
408  // we now have a pointer to the object stored in two places in the
409  // hash table.  Fixing this would require specializing the hash
410  // table, which would be hard to do portably.
411  class Merge_data_eq
412  {
413   public:
414    Merge_data_eq(const Output_merge_data* pomd)
415      : pomd_(pomd)
416    { }
417
418    bool
419    operator()(Merge_data_key k1, Merge_data_key k2) const;
420
421   private:
422    const Output_merge_data* pomd_;
423  };
424
425  friend class Merge_data_eq;
426
427  // The type of the hash table.
428  typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
429    Merge_data_hashtable;
430
431  // Given a hash table key, which is just an offset into the section
432  // data, return a pointer to the corresponding constant.
433  const unsigned char*
434  constant(Merge_data_key k) const
435  {
436    gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
437    return this->p_ + k;
438  }
439
440  // Add a constant to the output.
441  void
442  add_constant(const unsigned char*);
443
444  // The accumulated data.
445  unsigned char* p_;
446  // The length of the accumulated data.
447  section_size_type len_;
448  // The size of the allocated buffer.
449  section_size_type alc_;
450  // The number of entries seen in input files.
451  size_t input_count_;
452  // The hash table.
453  Merge_data_hashtable hashtable_;
454};
455
456// Handle SHF_MERGE sections with string data.  This is a template
457// based on the type of the characters in the string.
458
459template<typename Char_type>
460class Output_merge_string : public Output_merge_base
461{
462 public:
463  Output_merge_string(uint64_t addralign)
464    : Output_merge_base(sizeof(Char_type), addralign), stringpool_(),
465      merged_strings_lists_(), input_count_(0), input_size_(0)
466  {
467    gold_assert(addralign <= sizeof(Char_type));
468    this->stringpool_.set_no_zero_null();
469  }
470
471 protected:
472  // Add an input section.
473  bool
474  do_add_input_section(Relobj* object, unsigned int shndx);
475
476  // Do all the final processing after the input sections are read in.
477  // Returns the final data size.
478  section_size_type
479  finalize_merged_data();
480
481  // Set the final data size.
482  void
483  set_final_data_size();
484
485  // Write the data to the file.
486  void
487  do_write(Output_file*);
488
489  // Write the data to a buffer.
490  void
491  do_write_to_buffer(unsigned char*);
492
493  // Write to a map file.
494  void
495  do_print_to_mapfile(Mapfile* mapfile) const
496  { mapfile->print_output_data(this, _("** merge strings")); }
497
498  // Print merge stats to stderr.
499  void
500  do_print_merge_stats(const char* section_name);
501
502  // Writes the stringpool to a buffer.
503  void
504  stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
505  { this->stringpool_.write_to_buffer(buffer, buffer_size); }
506
507  // Clears all the data in the stringpool, to save on memory.
508  void
509  clear_stringpool()
510  { this->stringpool_.clear(); }
511
512  // Whether this is a merge string section.
513  virtual bool
514  do_is_string()
515  { return true; }
516
517  // Set keeps-input-sections flag.
518  void
519  do_set_keeps_input_sections()
520  {
521    gold_assert(this->input_count_ == 0);
522    Output_merge_base::do_set_keeps_input_sections();
523  }
524
525 private:
526  // The name of the string type, for stats.
527  const char*
528  string_name();
529
530  // As we see input sections, we build a mapping from object, section
531  // index and offset to strings.
532  struct Merged_string
533  {
534    // The offset in the input section.
535    section_offset_type offset;
536    // The key in the Stringpool.
537    Stringpool::Key stringpool_key;
538
539    Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
540      : offset(offseta), stringpool_key(stringpool_keya)
541    { }
542  };
543
544  typedef std::vector<Merged_string> Merged_strings;
545
546  struct Merged_strings_list
547  {
548    // The input object where the strings were found.
549    Relobj* object;
550    // The input section in the input object.
551    unsigned int shndx;
552    // The list of merged strings.
553    Merged_strings merged_strings;
554
555    Merged_strings_list(Relobj* objecta, unsigned int shndxa)
556      : object(objecta), shndx(shndxa), merged_strings()
557    { }
558  };
559
560  typedef std::vector<Merged_strings_list*> Merged_strings_lists;
561
562  // As we see the strings, we add them to a Stringpool.
563  Stringpool_template<Char_type> stringpool_;
564  // Map from a location in an input object to an entry in the
565  // Stringpool.
566  Merged_strings_lists merged_strings_lists_;
567  // The number of entries seen in input files.
568  size_t input_count_;
569  // The total size of input sections.
570  size_t input_size_;
571};
572
573} // End namespace gold.
574
575#endif // !defined(GOLD_MERGE_H)
576