1/* hash.c -- hash table routines for BFD
2   Copyright 1993, 1994, 1995, 1997, 1999, 2001, 2002, 2003, 2004, 2005,
3   2006 Free Software Foundation, Inc.
4   Written by Steve Chamberlain <sac@cygnus.com>
5
6   This file is part of BFD, the Binary File Descriptor library.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
21
22#include "bfd.h"
23#include "sysdep.h"
24#include "libbfd.h"
25#include "objalloc.h"
26#include "libiberty.h"
27
28/*
29SECTION
30	Hash Tables
31
32@cindex Hash tables
33	BFD provides a simple set of hash table functions.  Routines
34	are provided to initialize a hash table, to free a hash table,
35	to look up a string in a hash table and optionally create an
36	entry for it, and to traverse a hash table.  There is
37	currently no routine to delete an string from a hash table.
38
39	The basic hash table does not permit any data to be stored
40	with a string.  However, a hash table is designed to present a
41	base class from which other types of hash tables may be
42	derived.  These derived types may store additional information
43	with the string.  Hash tables were implemented in this way,
44	rather than simply providing a data pointer in a hash table
45	entry, because they were designed for use by the linker back
46	ends.  The linker may create thousands of hash table entries,
47	and the overhead of allocating private data and storing and
48	following pointers becomes noticeable.
49
50	The basic hash table code is in <<hash.c>>.
51
52@menu
53@* Creating and Freeing a Hash Table::
54@* Looking Up or Entering a String::
55@* Traversing a Hash Table::
56@* Deriving a New Hash Table Type::
57@end menu
58
59INODE
60Creating and Freeing a Hash Table, Looking Up or Entering a String, Hash Tables, Hash Tables
61SUBSECTION
62	Creating and freeing a hash table
63
64@findex bfd_hash_table_init
65@findex bfd_hash_table_init_n
66	To create a hash table, create an instance of a <<struct
67	bfd_hash_table>> (defined in <<bfd.h>>) and call
68	<<bfd_hash_table_init>> (if you know approximately how many
69	entries you will need, the function <<bfd_hash_table_init_n>>,
70	which takes a @var{size} argument, may be used).
71	<<bfd_hash_table_init>> returns <<FALSE>> if some sort of
72	error occurs.
73
74@findex bfd_hash_newfunc
75	The function <<bfd_hash_table_init>> take as an argument a
76	function to use to create new entries.  For a basic hash
77	table, use the function <<bfd_hash_newfunc>>.  @xref{Deriving
78	a New Hash Table Type}, for why you would want to use a
79	different value for this argument.
80
81@findex bfd_hash_allocate
82	<<bfd_hash_table_init>> will create an objalloc which will be
83	used to allocate new entries.  You may allocate memory on this
84	objalloc using <<bfd_hash_allocate>>.
85
86@findex bfd_hash_table_free
87	Use <<bfd_hash_table_free>> to free up all the memory that has
88	been allocated for a hash table.  This will not free up the
89	<<struct bfd_hash_table>> itself, which you must provide.
90
91@findex bfd_hash_set_default_size
92	Use <<bfd_hash_set_default_size>> to set the default size of
93	hash table to use.
94
95INODE
96Looking Up or Entering a String, Traversing a Hash Table, Creating and Freeing a Hash Table, Hash Tables
97SUBSECTION
98	Looking up or entering a string
99
100@findex bfd_hash_lookup
101	The function <<bfd_hash_lookup>> is used both to look up a
102	string in the hash table and to create a new entry.
103
104	If the @var{create} argument is <<FALSE>>, <<bfd_hash_lookup>>
105	will look up a string.  If the string is found, it will
106	returns a pointer to a <<struct bfd_hash_entry>>.  If the
107	string is not found in the table <<bfd_hash_lookup>> will
108	return <<NULL>>.  You should not modify any of the fields in
109	the returns <<struct bfd_hash_entry>>.
110
111	If the @var{create} argument is <<TRUE>>, the string will be
112	entered into the hash table if it is not already there.
113	Either way a pointer to a <<struct bfd_hash_entry>> will be
114	returned, either to the existing structure or to a newly
115	created one.  In this case, a <<NULL>> return means that an
116	error occurred.
117
118	If the @var{create} argument is <<TRUE>>, and a new entry is
119	created, the @var{copy} argument is used to decide whether to
120	copy the string onto the hash table objalloc or not.  If
121	@var{copy} is passed as <<FALSE>>, you must be careful not to
122	deallocate or modify the string as long as the hash table
123	exists.
124
125INODE
126Traversing a Hash Table, Deriving a New Hash Table Type, Looking Up or Entering a String, Hash Tables
127SUBSECTION
128	Traversing a hash table
129
130@findex bfd_hash_traverse
131	The function <<bfd_hash_traverse>> may be used to traverse a
132	hash table, calling a function on each element.  The traversal
133	is done in a random order.
134
135	<<bfd_hash_traverse>> takes as arguments a function and a
136	generic <<void *>> pointer.  The function is called with a
137	hash table entry (a <<struct bfd_hash_entry *>>) and the
138	generic pointer passed to <<bfd_hash_traverse>>.  The function
139	must return a <<boolean>> value, which indicates whether to
140	continue traversing the hash table.  If the function returns
141	<<FALSE>>, <<bfd_hash_traverse>> will stop the traversal and
142	return immediately.
143
144INODE
145Deriving a New Hash Table Type, , Traversing a Hash Table, Hash Tables
146SUBSECTION
147	Deriving a new hash table type
148
149	Many uses of hash tables want to store additional information
150	which each entry in the hash table.  Some also find it
151	convenient to store additional information with the hash table
152	itself.  This may be done using a derived hash table.
153
154	Since C is not an object oriented language, creating a derived
155	hash table requires sticking together some boilerplate
156	routines with a few differences specific to the type of hash
157	table you want to create.
158
159	An example of a derived hash table is the linker hash table.
160	The structures for this are defined in <<bfdlink.h>>.  The
161	functions are in <<linker.c>>.
162
163	You may also derive a hash table from an already derived hash
164	table.  For example, the a.out linker backend code uses a hash
165	table derived from the linker hash table.
166
167@menu
168@* Define the Derived Structures::
169@* Write the Derived Creation Routine::
170@* Write Other Derived Routines::
171@end menu
172
173INODE
174Define the Derived Structures, Write the Derived Creation Routine, Deriving a New Hash Table Type, Deriving a New Hash Table Type
175SUBSUBSECTION
176	Define the derived structures
177
178	You must define a structure for an entry in the hash table,
179	and a structure for the hash table itself.
180
181	The first field in the structure for an entry in the hash
182	table must be of the type used for an entry in the hash table
183	you are deriving from.  If you are deriving from a basic hash
184	table this is <<struct bfd_hash_entry>>, which is defined in
185	<<bfd.h>>.  The first field in the structure for the hash
186	table itself must be of the type of the hash table you are
187	deriving from itself.  If you are deriving from a basic hash
188	table, this is <<struct bfd_hash_table>>.
189
190	For example, the linker hash table defines <<struct
191	bfd_link_hash_entry>> (in <<bfdlink.h>>).  The first field,
192	<<root>>, is of type <<struct bfd_hash_entry>>.  Similarly,
193	the first field in <<struct bfd_link_hash_table>>, <<table>>,
194	is of type <<struct bfd_hash_table>>.
195
196INODE
197Write the Derived Creation Routine, Write Other Derived Routines, Define the Derived Structures, Deriving a New Hash Table Type
198SUBSUBSECTION
199	Write the derived creation routine
200
201	You must write a routine which will create and initialize an
202	entry in the hash table.  This routine is passed as the
203	function argument to <<bfd_hash_table_init>>.
204
205	In order to permit other hash tables to be derived from the
206	hash table you are creating, this routine must be written in a
207	standard way.
208
209	The first argument to the creation routine is a pointer to a
210	hash table entry.  This may be <<NULL>>, in which case the
211	routine should allocate the right amount of space.  Otherwise
212	the space has already been allocated by a hash table type
213	derived from this one.
214
215	After allocating space, the creation routine must call the
216	creation routine of the hash table type it is derived from,
217	passing in a pointer to the space it just allocated.  This
218	will initialize any fields used by the base hash table.
219
220	Finally the creation routine must initialize any local fields
221	for the new hash table type.
222
223	Here is a boilerplate example of a creation routine.
224	@var{function_name} is the name of the routine.
225	@var{entry_type} is the type of an entry in the hash table you
226	are creating.  @var{base_newfunc} is the name of the creation
227	routine of the hash table type your hash table is derived
228	from.
229
230EXAMPLE
231
232.struct bfd_hash_entry *
233.@var{function_name} (struct bfd_hash_entry *entry,
234.                     struct bfd_hash_table *table,
235.                     const char *string)
236.{
237.  struct @var{entry_type} *ret = (@var{entry_type} *) entry;
238.
239. {* Allocate the structure if it has not already been allocated by a
240.    derived class.  *}
241.  if (ret == NULL)
242.    {
243.      ret = bfd_hash_allocate (table, sizeof (* ret));
244.      if (ret == NULL)
245.        return NULL;
246.    }
247.
248. {* Call the allocation method of the base class.  *}
249.  ret = ((@var{entry_type} *)
250.	 @var{base_newfunc} ((struct bfd_hash_entry *) ret, table, string));
251.
252. {* Initialize the local fields here.  *}
253.
254.  return (struct bfd_hash_entry *) ret;
255.}
256
257DESCRIPTION
258	The creation routine for the linker hash table, which is in
259	<<linker.c>>, looks just like this example.
260	@var{function_name} is <<_bfd_link_hash_newfunc>>.
261	@var{entry_type} is <<struct bfd_link_hash_entry>>.
262	@var{base_newfunc} is <<bfd_hash_newfunc>>, the creation
263	routine for a basic hash table.
264
265	<<_bfd_link_hash_newfunc>> also initializes the local fields
266	in a linker hash table entry: <<type>>, <<written>> and
267	<<next>>.
268
269INODE
270Write Other Derived Routines, , Write the Derived Creation Routine, Deriving a New Hash Table Type
271SUBSUBSECTION
272	Write other derived routines
273
274	You will want to write other routines for your new hash table,
275	as well.
276
277	You will want an initialization routine which calls the
278	initialization routine of the hash table you are deriving from
279	and initializes any other local fields.  For the linker hash
280	table, this is <<_bfd_link_hash_table_init>> in <<linker.c>>.
281
282	You will want a lookup routine which calls the lookup routine
283	of the hash table you are deriving from and casts the result.
284	The linker hash table uses <<bfd_link_hash_lookup>> in
285	<<linker.c>> (this actually takes an additional argument which
286	it uses to decide how to return the looked up value).
287
288	You may want a traversal routine.  This should just call the
289	traversal routine of the hash table you are deriving from with
290	appropriate casts.  The linker hash table uses
291	<<bfd_link_hash_traverse>> in <<linker.c>>.
292
293	These routines may simply be defined as macros.  For example,
294	the a.out backend linker hash table, which is derived from the
295	linker hash table, uses macros for the lookup and traversal
296	routines.  These are <<aout_link_hash_lookup>> and
297	<<aout_link_hash_traverse>> in aoutx.h.
298*/
299
300/* The default number of entries to use when creating a hash table.  */
301#define DEFAULT_SIZE 4051
302static size_t bfd_default_hash_table_size = DEFAULT_SIZE;
303
304/* Create a new hash table, given a number of entries.  */
305
306bfd_boolean
307bfd_hash_table_init_n (struct bfd_hash_table *table,
308		       struct bfd_hash_entry *(*newfunc) (struct bfd_hash_entry *,
309							  struct bfd_hash_table *,
310							  const char *),
311		       unsigned int entsize,
312		       unsigned int size)
313{
314  unsigned int alloc;
315
316  alloc = size * sizeof (struct bfd_hash_entry *);
317
318  table->memory = (void *) objalloc_create ();
319  if (table->memory == NULL)
320    {
321      bfd_set_error (bfd_error_no_memory);
322      return FALSE;
323    }
324  table->table = objalloc_alloc ((struct objalloc *) table->memory, alloc);
325  if (table->table == NULL)
326    {
327      bfd_set_error (bfd_error_no_memory);
328      return FALSE;
329    }
330  memset ((void *) table->table, 0, alloc);
331  table->size = size;
332  table->entsize = entsize;
333  table->newfunc = newfunc;
334  return TRUE;
335}
336
337/* Create a new hash table with the default number of entries.  */
338
339bfd_boolean
340bfd_hash_table_init (struct bfd_hash_table *table,
341		     struct bfd_hash_entry *(*newfunc) (struct bfd_hash_entry *,
342							struct bfd_hash_table *,
343							const char *),
344		     unsigned int entsize)
345{
346  return bfd_hash_table_init_n (table, newfunc, entsize,
347				bfd_default_hash_table_size);
348}
349
350/* Free a hash table.  */
351
352void
353bfd_hash_table_free (struct bfd_hash_table *table)
354{
355  objalloc_free (table->memory);
356  table->memory = NULL;
357}
358
359/* Look up a string in a hash table.  */
360
361struct bfd_hash_entry *
362bfd_hash_lookup (struct bfd_hash_table *table,
363		 const char *string,
364		 bfd_boolean create,
365		 bfd_boolean copy)
366{
367  const unsigned char *s;
368  unsigned long hash;
369  unsigned int c;
370  struct bfd_hash_entry *hashp;
371  unsigned int len;
372  unsigned int index;
373
374  hash = 0;
375  len = 0;
376  s = (const unsigned char *) string;
377  while ((c = *s++) != '\0')
378    {
379      hash += c + (c << 17);
380      hash ^= hash >> 2;
381    }
382  len = (s - (const unsigned char *) string) - 1;
383  hash += len + (len << 17);
384  hash ^= hash >> 2;
385
386  index = hash % table->size;
387  for (hashp = table->table[index];
388       hashp != NULL;
389       hashp = hashp->next)
390    {
391      if (hashp->hash == hash
392	  && strcmp (hashp->string, string) == 0)
393	return hashp;
394    }
395
396  if (! create)
397    return NULL;
398
399  hashp = (*table->newfunc) (NULL, table, string);
400  if (hashp == NULL)
401    return NULL;
402  if (copy)
403    {
404      char *new;
405
406      new = objalloc_alloc ((struct objalloc *) table->memory, len + 1);
407      if (!new)
408	{
409	  bfd_set_error (bfd_error_no_memory);
410	  return NULL;
411	}
412      memcpy (new, string, len + 1);
413      string = new;
414    }
415  hashp->string = string;
416  hashp->hash = hash;
417  hashp->next = table->table[index];
418  table->table[index] = hashp;
419
420  return hashp;
421}
422
423/* Replace an entry in a hash table.  */
424
425void
426bfd_hash_replace (struct bfd_hash_table *table,
427		  struct bfd_hash_entry *old,
428		  struct bfd_hash_entry *nw)
429{
430  unsigned int index;
431  struct bfd_hash_entry **pph;
432
433  index = old->hash % table->size;
434  for (pph = &table->table[index];
435       (*pph) != NULL;
436       pph = &(*pph)->next)
437    {
438      if (*pph == old)
439	{
440	  *pph = nw;
441	  return;
442	}
443    }
444
445  abort ();
446}
447
448/* Allocate space in a hash table.  */
449
450void *
451bfd_hash_allocate (struct bfd_hash_table *table,
452		   unsigned int size)
453{
454  void * ret;
455
456  ret = objalloc_alloc ((struct objalloc *) table->memory, size);
457  if (ret == NULL && size != 0)
458    bfd_set_error (bfd_error_no_memory);
459  return ret;
460}
461
462/* Base method for creating a new hash table entry.  */
463
464struct bfd_hash_entry *
465bfd_hash_newfunc (struct bfd_hash_entry *entry,
466		  struct bfd_hash_table *table,
467		  const char *string ATTRIBUTE_UNUSED)
468{
469  if (entry == NULL)
470    entry = bfd_hash_allocate (table, sizeof (* entry));
471  return entry;
472}
473
474/* Traverse a hash table.  */
475
476void
477bfd_hash_traverse (struct bfd_hash_table *table,
478		   bfd_boolean (*func) (struct bfd_hash_entry *, void *),
479		   void * info)
480{
481  unsigned int i;
482
483  for (i = 0; i < table->size; i++)
484    {
485      struct bfd_hash_entry *p;
486
487      for (p = table->table[i]; p != NULL; p = p->next)
488	if (! (*func) (p, info))
489	  return;
490    }
491}
492
493void
494bfd_hash_set_default_size (bfd_size_type hash_size)
495{
496  /* Extend this prime list if you want more granularity of hash table size.  */
497  static const bfd_size_type hash_size_primes[] =
498    {
499      251, 509, 1021, 2039, 4051, 8599, 16699, 32749
500    };
501  size_t index;
502
503  /* Work out best prime number near the hash_size.  */
504  for (index = 0; index < ARRAY_SIZE (hash_size_primes) - 1; ++index)
505    if (hash_size <= hash_size_primes[index])
506      break;
507
508  bfd_default_hash_table_size = hash_size_primes[index];
509}
510
511/* A few different object file formats (a.out, COFF, ELF) use a string
512   table.  These functions support adding strings to a string table,
513   returning the byte offset, and writing out the table.
514
515   Possible improvements:
516   + look for strings matching trailing substrings of other strings
517   + better data structures?  balanced trees?
518   + look at reducing memory use elsewhere -- maybe if we didn't have
519     to construct the entire symbol table at once, we could get by
520     with smaller amounts of VM?  (What effect does that have on the
521     string table reductions?)  */
522
523/* An entry in the strtab hash table.  */
524
525struct strtab_hash_entry
526{
527  struct bfd_hash_entry root;
528  /* Index in string table.  */
529  bfd_size_type index;
530  /* Next string in strtab.  */
531  struct strtab_hash_entry *next;
532};
533
534/* The strtab hash table.  */
535
536struct bfd_strtab_hash
537{
538  struct bfd_hash_table table;
539  /* Size of strtab--also next available index.  */
540  bfd_size_type size;
541  /* First string in strtab.  */
542  struct strtab_hash_entry *first;
543  /* Last string in strtab.  */
544  struct strtab_hash_entry *last;
545  /* Whether to precede strings with a two byte length, as in the
546     XCOFF .debug section.  */
547  bfd_boolean xcoff;
548};
549
550/* Routine to create an entry in a strtab.  */
551
552static struct bfd_hash_entry *
553strtab_hash_newfunc (struct bfd_hash_entry *entry,
554		     struct bfd_hash_table *table,
555		     const char *string)
556{
557  struct strtab_hash_entry *ret = (struct strtab_hash_entry *) entry;
558
559  /* Allocate the structure if it has not already been allocated by a
560     subclass.  */
561  if (ret == NULL)
562    ret = bfd_hash_allocate (table, sizeof (* ret));
563  if (ret == NULL)
564    return NULL;
565
566  /* Call the allocation method of the superclass.  */
567  ret = (struct strtab_hash_entry *)
568	 bfd_hash_newfunc ((struct bfd_hash_entry *) ret, table, string);
569
570  if (ret)
571    {
572      /* Initialize the local fields.  */
573      ret->index = (bfd_size_type) -1;
574      ret->next = NULL;
575    }
576
577  return (struct bfd_hash_entry *) ret;
578}
579
580/* Look up an entry in an strtab.  */
581
582#define strtab_hash_lookup(t, string, create, copy) \
583  ((struct strtab_hash_entry *) \
584   bfd_hash_lookup (&(t)->table, (string), (create), (copy)))
585
586/* Create a new strtab.  */
587
588struct bfd_strtab_hash *
589_bfd_stringtab_init (void)
590{
591  struct bfd_strtab_hash *table;
592  bfd_size_type amt = sizeof (* table);
593
594  table = bfd_malloc (amt);
595  if (table == NULL)
596    return NULL;
597
598  if (!bfd_hash_table_init (&table->table, strtab_hash_newfunc,
599			    sizeof (struct strtab_hash_entry)))
600    {
601      free (table);
602      return NULL;
603    }
604
605  table->size = 0;
606  table->first = NULL;
607  table->last = NULL;
608  table->xcoff = FALSE;
609
610  return table;
611}
612
613/* Create a new strtab in which the strings are output in the format
614   used in the XCOFF .debug section: a two byte length precedes each
615   string.  */
616
617struct bfd_strtab_hash *
618_bfd_xcoff_stringtab_init (void)
619{
620  struct bfd_strtab_hash *ret;
621
622  ret = _bfd_stringtab_init ();
623  if (ret != NULL)
624    ret->xcoff = TRUE;
625  return ret;
626}
627
628/* Free a strtab.  */
629
630void
631_bfd_stringtab_free (struct bfd_strtab_hash *table)
632{
633  bfd_hash_table_free (&table->table);
634  free (table);
635}
636
637/* Get the index of a string in a strtab, adding it if it is not
638   already present.  If HASH is FALSE, we don't really use the hash
639   table, and we don't eliminate duplicate strings.  */
640
641bfd_size_type
642_bfd_stringtab_add (struct bfd_strtab_hash *tab,
643		    const char *str,
644		    bfd_boolean hash,
645		    bfd_boolean copy)
646{
647  struct strtab_hash_entry *entry;
648
649  if (hash)
650    {
651      entry = strtab_hash_lookup (tab, str, TRUE, copy);
652      if (entry == NULL)
653	return (bfd_size_type) -1;
654    }
655  else
656    {
657      entry = bfd_hash_allocate (&tab->table, sizeof (* entry));
658      if (entry == NULL)
659	return (bfd_size_type) -1;
660      if (! copy)
661	entry->root.string = str;
662      else
663	{
664	  char *n;
665
666	  n = bfd_hash_allocate (&tab->table, strlen (str) + 1);
667	  if (n == NULL)
668	    return (bfd_size_type) -1;
669	  entry->root.string = n;
670	}
671      entry->index = (bfd_size_type) -1;
672      entry->next = NULL;
673    }
674
675  if (entry->index == (bfd_size_type) -1)
676    {
677      entry->index = tab->size;
678      tab->size += strlen (str) + 1;
679      if (tab->xcoff)
680	{
681	  entry->index += 2;
682	  tab->size += 2;
683	}
684      if (tab->first == NULL)
685	tab->first = entry;
686      else
687	tab->last->next = entry;
688      tab->last = entry;
689    }
690
691  return entry->index;
692}
693
694/* Get the number of bytes in a strtab.  */
695
696bfd_size_type
697_bfd_stringtab_size (struct bfd_strtab_hash *tab)
698{
699  return tab->size;
700}
701
702/* Write out a strtab.  ABFD must already be at the right location in
703   the file.  */
704
705bfd_boolean
706_bfd_stringtab_emit (bfd *abfd, struct bfd_strtab_hash *tab)
707{
708  bfd_boolean xcoff;
709  struct strtab_hash_entry *entry;
710
711  xcoff = tab->xcoff;
712
713  for (entry = tab->first; entry != NULL; entry = entry->next)
714    {
715      const char *str;
716      size_t len;
717
718      str = entry->root.string;
719      len = strlen (str) + 1;
720
721      if (xcoff)
722	{
723	  bfd_byte buf[2];
724
725	  /* The output length includes the null byte.  */
726	  bfd_put_16 (abfd, (bfd_vma) len, buf);
727	  if (bfd_bwrite ((void *) buf, (bfd_size_type) 2, abfd) != 2)
728	    return FALSE;
729	}
730
731      if (bfd_bwrite ((void *) str, (bfd_size_type) len, abfd) != len)
732	return FALSE;
733    }
734
735  return TRUE;
736}
737