1// stringpool.cc -- a string pool for gold
2
3// Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
4// Written by Ian Lance Taylor <iant@google.com>.
5
6// This file is part of gold.
7
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// GNU General Public License for more details.
17
18// You should have received a copy of the GNU General Public License
19// along with this program; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21// MA 02110-1301, USA.
22
23#include "gold.h"
24
25#include <cstring>
26#include <algorithm>
27#include <vector>
28
29#include "output.h"
30#include "parameters.h"
31#include "stringpool.h"
32
33namespace gold
34{
35
36template<typename Stringpool_char>
37Stringpool_template<Stringpool_char>::Stringpool_template()
38  : string_set_(), key_to_offset_(), strings_(), strtab_size_(0),
39    zero_null_(true), optimize_(false), offset_(sizeof(Stringpool_char))
40{
41  if (parameters->options_valid() && parameters->options().optimize() >= 2)
42    this->optimize_ = true;
43}
44
45template<typename Stringpool_char>
46void
47Stringpool_template<Stringpool_char>::clear()
48{
49  for (typename std::list<Stringdata*>::iterator p = this->strings_.begin();
50       p != this->strings_.end();
51       ++p)
52    delete[] reinterpret_cast<char*>(*p);
53  this->strings_.clear();
54  this->key_to_offset_.clear();
55  this->string_set_.clear();
56}
57
58template<typename Stringpool_char>
59Stringpool_template<Stringpool_char>::~Stringpool_template()
60{
61  this->clear();
62}
63
64// Resize the internal hashtable with the expectation we'll get n new
65// elements.  Note that the hashtable constructor takes a "number of
66// buckets you'd like," rather than "number of elements you'd like,"
67// but that's the best we can do.
68
69template<typename Stringpool_char>
70void
71Stringpool_template<Stringpool_char>::reserve(unsigned int n)
72{
73  this->key_to_offset_.reserve(n);
74
75#if defined(HAVE_TR1_UNORDERED_MAP)
76  // rehash() implementation is broken in gcc 4.0.3's stl
77  //this->string_set_.rehash(this->string_set_.size() + n);
78  //return;
79#elif defined(HAVE_EXT_HASH_MAP)
80  this->string_set_.resize(this->string_set_.size() + n);
81  return;
82#endif
83
84  // This is the generic "reserve" code, if no #ifdef above triggers.
85  String_set_type new_string_set(this->string_set_.size() + n);
86  new_string_set.insert(this->string_set_.begin(), this->string_set_.end());
87  this->string_set_.swap(new_string_set);
88}
89
90// Compare two strings of arbitrary character type for equality.
91
92template<typename Stringpool_char>
93bool
94Stringpool_template<Stringpool_char>::string_equal(const Stringpool_char* s1,
95						   const Stringpool_char* s2)
96{
97  while (*s1 != 0)
98    if (*s1++ != *s2++)
99      return false;
100  return *s2 == 0;
101}
102
103// Specialize string_equal for char.
104
105template<>
106inline bool
107Stringpool_template<char>::string_equal(const char* s1, const char* s2)
108{
109  return strcmp(s1, s2) == 0;
110}
111
112// Equality comparison function for the hash table.
113
114template<typename Stringpool_char>
115inline bool
116Stringpool_template<Stringpool_char>::Stringpool_eq::operator()(
117    const Hashkey& h1,
118    const Hashkey& h2) const
119{
120  return (h1.hash_code == h2.hash_code
121	  && h1.length == h2.length
122	  && (h1.string == h2.string
123	      || memcmp(h1.string, h2.string,
124			h1.length * sizeof(Stringpool_char)) == 0));
125}
126
127// Hash function.  The length is in characters, not bytes.
128
129template<typename Stringpool_char>
130size_t
131Stringpool_template<Stringpool_char>::string_hash(const Stringpool_char* s,
132						  size_t length)
133{
134  return gold::string_hash<Stringpool_char>(s, length);
135}
136
137// Add the string S to the list of canonical strings.  Return a
138// pointer to the canonical string.  If PKEY is not NULL, set *PKEY to
139// the key.  LENGTH is the length of S in characters.  Note that S may
140// not be NUL terminated.
141
142template<typename Stringpool_char>
143const Stringpool_char*
144Stringpool_template<Stringpool_char>::add_string(const Stringpool_char* s,
145						 size_t len)
146{
147  // We are in trouble if we've already computed the string offsets.
148  gold_assert(this->strtab_size_ == 0);
149
150  // The size we allocate for a new Stringdata.
151  const size_t buffer_size = 1000;
152  // The amount we multiply the Stringdata index when calculating the
153  // key.
154  const size_t key_mult = 1024;
155  gold_assert(key_mult >= buffer_size);
156
157  // Convert len to the number of bytes we need to allocate, including
158  // the null character.
159  len = (len + 1) * sizeof(Stringpool_char);
160
161  size_t alc;
162  bool front = true;
163  if (len > buffer_size)
164    {
165      alc = sizeof(Stringdata) + len;
166      front = false;
167    }
168  else if (this->strings_.empty())
169    alc = sizeof(Stringdata) + buffer_size;
170  else
171    {
172      Stringdata* psd = this->strings_.front();
173      if (len > psd->alc - psd->len)
174	alc = sizeof(Stringdata) + buffer_size;
175      else
176	{
177	  char* ret = psd->data + psd->len;
178	  memcpy(ret, s, len - sizeof(Stringpool_char));
179	  memset(ret + len - sizeof(Stringpool_char), 0,
180		 sizeof(Stringpool_char));
181
182	  psd->len += len;
183
184	  return reinterpret_cast<const Stringpool_char*>(ret);
185	}
186    }
187
188  Stringdata* psd = reinterpret_cast<Stringdata*>(new char[alc]);
189  psd->alc = alc - sizeof(Stringdata);
190  memcpy(psd->data, s, len - sizeof(Stringpool_char));
191  memset(psd->data + len - sizeof(Stringpool_char), 0,
192	 sizeof(Stringpool_char));
193  psd->len = len;
194
195  if (front)
196    this->strings_.push_front(psd);
197  else
198    this->strings_.push_back(psd);
199
200  return reinterpret_cast<const Stringpool_char*>(psd->data);
201}
202
203// Add a string to a string pool.
204
205template<typename Stringpool_char>
206const Stringpool_char*
207Stringpool_template<Stringpool_char>::add(const Stringpool_char* s, bool copy,
208                                          Key* pkey)
209{
210  return this->add_with_length(s, string_length(s), copy, pkey);
211}
212
213// Add a new key offset entry.
214
215template<typename Stringpool_char>
216void
217Stringpool_template<Stringpool_char>::new_key_offset(size_t length)
218{
219  section_offset_type offset;
220  if (this->zero_null_ && length == 0)
221    offset = 0;
222  else
223    {
224      offset = this->offset_;
225      this->offset_ += (length + 1) * sizeof(Stringpool_char);
226    }
227  this->key_to_offset_.push_back(offset);
228}
229
230template<typename Stringpool_char>
231const Stringpool_char*
232Stringpool_template<Stringpool_char>::add_with_length(const Stringpool_char* s,
233						      size_t length,
234						      bool copy,
235						      Key* pkey)
236{
237  typedef std::pair<typename String_set_type::iterator, bool> Insert_type;
238
239  // We add 1 so that 0 is always invalid.
240  const Key k = this->key_to_offset_.size() + 1;
241
242  if (!copy)
243    {
244      // When we don't need to copy the string, we can call insert
245      // directly.
246
247      std::pair<Hashkey, Hashval> element(Hashkey(s, length), k);
248
249      Insert_type ins = this->string_set_.insert(element);
250
251      typename String_set_type::const_iterator p = ins.first;
252
253      if (ins.second)
254	{
255	  // We just added the string.  The key value has now been
256	  // used.
257	  this->new_key_offset(length);
258	}
259      else
260	{
261	  gold_assert(k != p->second);
262	}
263
264      if (pkey != NULL)
265	*pkey = p->second;
266      return p->first.string;
267    }
268
269  // When we have to copy the string, we look it up twice in the hash
270  // table.  The problem is that we can't insert S before we
271  // canonicalize it by copying it into the canonical list. The hash
272  // code will only be computed once.
273
274  Hashkey hk(s, length);
275  typename String_set_type::const_iterator p = this->string_set_.find(hk);
276  if (p != this->string_set_.end())
277    {
278      if (pkey != NULL)
279	*pkey = p->second;
280      return p->first.string;
281    }
282
283  this->new_key_offset(length);
284
285  hk.string = this->add_string(s, length);
286  // The contents of the string stay the same, so we don't need to
287  // adjust hk.hash_code or hk.length.
288
289  std::pair<Hashkey, Hashval> element(hk, k);
290
291  Insert_type ins = this->string_set_.insert(element);
292  gold_assert(ins.second);
293
294  if (pkey != NULL)
295    *pkey = k;
296  return hk.string;
297}
298
299template<typename Stringpool_char>
300const Stringpool_char*
301Stringpool_template<Stringpool_char>::find(const Stringpool_char* s,
302					   Key* pkey) const
303{
304  Hashkey hk(s);
305  typename String_set_type::const_iterator p = this->string_set_.find(hk);
306  if (p == this->string_set_.end())
307    return NULL;
308
309  if (pkey != NULL)
310    *pkey = p->second;
311
312  return p->first.string;
313}
314
315// Comparison routine used when sorting into an ELF strtab.  We want
316// to sort this so that when one string is a suffix of another, we
317// always see the shorter string immediately after the longer string.
318// For example, we want to see these strings in this order:
319//   abcd
320//   cd
321//   d
322// When strings are not suffixes, we don't care what order they are
323// in, but we need to ensure that suffixes wind up next to each other.
324// So we do a reversed lexicographic sort on the reversed string.
325
326template<typename Stringpool_char>
327bool
328Stringpool_template<Stringpool_char>::Stringpool_sort_comparison::operator()(
329  const Stringpool_sort_info& sort_info1,
330  const Stringpool_sort_info& sort_info2) const
331{
332  const Hashkey& h1(sort_info1->first);
333  const Hashkey& h2(sort_info2->first);
334  const Stringpool_char* s1 = h1.string;
335  const Stringpool_char* s2 = h2.string;
336  const size_t len1 = h1.length;
337  const size_t len2 = h2.length;
338  const size_t minlen = len1 < len2 ? len1 : len2;
339  const Stringpool_char* p1 = s1 + len1 - 1;
340  const Stringpool_char* p2 = s2 + len2 - 1;
341  for (size_t i = minlen; i > 0; --i, --p1, --p2)
342    {
343      if (*p1 != *p2)
344	return *p1 > *p2;
345    }
346  return len1 > len2;
347}
348
349// Return whether s1 is a suffix of s2.
350
351template<typename Stringpool_char>
352bool
353Stringpool_template<Stringpool_char>::is_suffix(const Stringpool_char* s1,
354                                                size_t len1,
355						const Stringpool_char* s2,
356                                                size_t len2)
357{
358  if (len1 > len2)
359    return false;
360  return memcmp(s1, s2 + len2 - len1, len1 * sizeof(Stringpool_char)) == 0;
361}
362
363// Turn the stringpool into an ELF strtab: determine the offsets of
364// each string in the table.
365
366template<typename Stringpool_char>
367void
368Stringpool_template<Stringpool_char>::set_string_offsets()
369{
370  if (this->strtab_size_ != 0)
371    {
372      // We've already computed the offsets.
373      return;
374    }
375
376  const size_t charsize = sizeof(Stringpool_char);
377
378  // Offset 0 may be reserved for the empty string.
379  section_offset_type offset = this->zero_null_ ? charsize : 0;
380
381  // Sorting to find suffixes can take over 25% of the total CPU time
382  // used by the linker.  Since it's merely an optimization to reduce
383  // the strtab size, and gives a relatively small benefit (it's
384  // typically rare for a symbol to be a suffix of another), we only
385  // take the time to sort when the user asks for heavy optimization.
386  if (!this->optimize_)
387    {
388      // If we are not optimizing, the offsets are already assigned.
389      offset = this->offset_;
390    }
391  else
392    {
393      size_t count = this->string_set_.size();
394
395      std::vector<Stringpool_sort_info> v;
396      v.reserve(count);
397
398      for (typename String_set_type::iterator p = this->string_set_.begin();
399           p != this->string_set_.end();
400           ++p)
401        v.push_back(Stringpool_sort_info(p));
402
403      std::sort(v.begin(), v.end(), Stringpool_sort_comparison());
404
405      section_offset_type last_offset = -1;
406      for (typename std::vector<Stringpool_sort_info>::iterator last = v.end(),
407             curr = v.begin();
408           curr != v.end();
409           last = curr++)
410        {
411	  section_offset_type this_offset;
412          if (this->zero_null_ && (*curr)->first.string[0] == 0)
413            this_offset = 0;
414          else if (last != v.end()
415                   && is_suffix((*curr)->first.string,
416				(*curr)->first.length,
417                                (*last)->first.string,
418				(*last)->first.length))
419            this_offset = (last_offset
420			   + (((*last)->first.length - (*curr)->first.length)
421			      * charsize));
422          else
423            {
424              this_offset = offset;
425              offset += ((*curr)->first.length + 1) * charsize;
426            }
427	  this->key_to_offset_[(*curr)->second - 1] = this_offset;
428	  last_offset = this_offset;
429        }
430    }
431
432  this->strtab_size_ = offset;
433}
434
435// Get the offset of a string in the ELF strtab.  The string must
436// exist.
437
438template<typename Stringpool_char>
439section_offset_type
440Stringpool_template<Stringpool_char>::get_offset(const Stringpool_char* s)
441  const
442{
443  return this->get_offset_with_length(s, string_length(s));
444}
445
446template<typename Stringpool_char>
447section_offset_type
448Stringpool_template<Stringpool_char>::get_offset_with_length(
449    const Stringpool_char* s,
450    size_t length) const
451{
452  gold_assert(this->strtab_size_ != 0);
453  Hashkey hk(s, length);
454  typename String_set_type::const_iterator p = this->string_set_.find(hk);
455  if (p != this->string_set_.end())
456    return this->key_to_offset_[p->second - 1];
457  gold_unreachable();
458}
459
460// Write the ELF strtab into the buffer.
461
462template<typename Stringpool_char>
463void
464Stringpool_template<Stringpool_char>::write_to_buffer(
465    unsigned char* buffer,
466    section_size_type bufsize)
467{
468  gold_assert(this->strtab_size_ != 0);
469  gold_assert(bufsize >= this->strtab_size_);
470  if (this->zero_null_)
471    buffer[0] = '\0';
472  for (typename String_set_type::const_iterator p = this->string_set_.begin();
473       p != this->string_set_.end();
474       ++p)
475    {
476      const int len = (p->first.length + 1) * sizeof(Stringpool_char);
477      const section_offset_type offset = this->key_to_offset_[p->second - 1];
478      gold_assert(static_cast<section_size_type>(offset) + len
479		  <= this->strtab_size_);
480      memcpy(buffer + offset, p->first.string, len);
481    }
482}
483
484// Write the ELF strtab into the output file at the specified offset.
485
486template<typename Stringpool_char>
487void
488Stringpool_template<Stringpool_char>::write(Output_file* of, off_t offset)
489{
490  gold_assert(this->strtab_size_ != 0);
491  unsigned char* view = of->get_output_view(offset, this->strtab_size_);
492  this->write_to_buffer(view, this->strtab_size_);
493  of->write_output_view(offset, this->strtab_size_, view);
494}
495
496// Print statistical information to stderr.  This is used for --stats.
497
498template<typename Stringpool_char>
499void
500Stringpool_template<Stringpool_char>::print_stats(const char* name) const
501{
502#if defined(HAVE_TR1_UNORDERED_MAP) || defined(HAVE_EXT_HASH_MAP)
503  fprintf(stderr, _("%s: %s entries: %zu; buckets: %zu\n"),
504	  program_name, name, this->string_set_.size(),
505	  this->string_set_.bucket_count());
506#else
507  fprintf(stderr, _("%s: %s entries: %zu\n"),
508	  program_name, name, this->table_.size());
509#endif
510  fprintf(stderr, _("%s: %s Stringdata structures: %zu\n"),
511	  program_name, name, this->strings_.size());
512}
513
514// Instantiate the templates we need.
515
516template
517class Stringpool_template<char>;
518
519template
520class Stringpool_template<uint16_t>;
521
522template
523class Stringpool_template<uint32_t>;
524
525} // End namespace gold.
526