hash.c revision 89857
1/* hash.c -- hash table routines for BFD
2   Copyright 1993, 1994, 1995, 1997, 1999, 2001
3   Free Software Foundation, Inc.
4   Written by Steve Chamberlain <sac@cygnus.com>
5
6This file is part of BFD, the Binary File Descriptor library.
7
8This program is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2 of the License, or
11(at your option) any later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
21
22#include "bfd.h"
23#include "sysdep.h"
24#include "libbfd.h"
25#include "objalloc.h"
26
27/*
28SECTION
29	Hash Tables
30
31@cindex Hash tables
32	BFD provides a simple set of hash table functions.  Routines
33	are provided to initialize a hash table, to free a hash table,
34	to look up a string in a hash table and optionally create an
35	entry for it, and to traverse a hash table.  There is
36	currently no routine to delete an string from a hash table.
37
38	The basic hash table does not permit any data to be stored
39	with a string.  However, a hash table is designed to present a
40	base class from which other types of hash tables may be
41	derived.  These derived types may store additional information
42	with the string.  Hash tables were implemented in this way,
43	rather than simply providing a data pointer in a hash table
44	entry, because they were designed for use by the linker back
45	ends.  The linker may create thousands of hash table entries,
46	and the overhead of allocating private data and storing and
47	following pointers becomes noticeable.
48
49	The basic hash table code is in <<hash.c>>.
50
51@menu
52@* Creating and Freeing a Hash Table::
53@* Looking Up or Entering a String::
54@* Traversing a Hash Table::
55@* Deriving a New Hash Table Type::
56@end menu
57
58INODE
59Creating and Freeing a Hash Table, Looking Up or Entering a String, Hash Tables, Hash Tables
60SUBSECTION
61	Creating and freeing a hash table
62
63@findex bfd_hash_table_init
64@findex bfd_hash_table_init_n
65	To create a hash table, create an instance of a <<struct
66	bfd_hash_table>> (defined in <<bfd.h>>) and call
67	<<bfd_hash_table_init>> (if you know approximately how many
68	entries you will need, the function <<bfd_hash_table_init_n>>,
69	which takes a @var{size} argument, may be used).
70	<<bfd_hash_table_init>> returns <<false>> if some sort of
71	error occurs.
72
73@findex bfd_hash_newfunc
74	The function <<bfd_hash_table_init>> take as an argument a
75	function to use to create new entries.  For a basic hash
76	table, use the function <<bfd_hash_newfunc>>.  @xref{Deriving
77	a New Hash Table Type}, for why you would want to use a
78	different value for this argument.
79
80@findex bfd_hash_allocate
81	<<bfd_hash_table_init>> will create an objalloc which will be
82	used to allocate new entries.  You may allocate memory on this
83	objalloc using <<bfd_hash_allocate>>.
84
85@findex bfd_hash_table_free
86	Use <<bfd_hash_table_free>> to free up all the memory that has
87	been allocated for a hash table.  This will not free up the
88	<<struct bfd_hash_table>> itself, which you must provide.
89
90INODE
91Looking Up or Entering a String, Traversing a Hash Table, Creating and Freeing a Hash Table, Hash Tables
92SUBSECTION
93	Looking up or entering a string
94
95@findex bfd_hash_lookup
96	The function <<bfd_hash_lookup>> is used both to look up a
97	string in the hash table and to create a new entry.
98
99	If the @var{create} argument is <<false>>, <<bfd_hash_lookup>>
100	will look up a string.  If the string is found, it will
101	returns a pointer to a <<struct bfd_hash_entry>>.  If the
102	string is not found in the table <<bfd_hash_lookup>> will
103	return <<NULL>>.  You should not modify any of the fields in
104	the returns <<struct bfd_hash_entry>>.
105
106	If the @var{create} argument is <<true>>, the string will be
107	entered into the hash table if it is not already there.
108	Either way a pointer to a <<struct bfd_hash_entry>> will be
109	returned, either to the existing structure or to a newly
110	created one.  In this case, a <<NULL>> return means that an
111	error occurred.
112
113	If the @var{create} argument is <<true>>, and a new entry is
114	created, the @var{copy} argument is used to decide whether to
115	copy the string onto the hash table objalloc or not.  If
116	@var{copy} is passed as <<false>>, you must be careful not to
117	deallocate or modify the string as long as the hash table
118	exists.
119
120INODE
121Traversing a Hash Table, Deriving a New Hash Table Type, Looking Up or Entering a String, Hash Tables
122SUBSECTION
123	Traversing a hash table
124
125@findex bfd_hash_traverse
126	The function <<bfd_hash_traverse>> may be used to traverse a
127	hash table, calling a function on each element.  The traversal
128	is done in a random order.
129
130	<<bfd_hash_traverse>> takes as arguments a function and a
131	generic <<void *>> pointer.  The function is called with a
132	hash table entry (a <<struct bfd_hash_entry *>>) and the
133	generic pointer passed to <<bfd_hash_traverse>>.  The function
134	must return a <<boolean>> value, which indicates whether to
135	continue traversing the hash table.  If the function returns
136	<<false>>, <<bfd_hash_traverse>> will stop the traversal and
137	return immediately.
138
139INODE
140Deriving a New Hash Table Type, , Traversing a Hash Table, Hash Tables
141SUBSECTION
142	Deriving a new hash table type
143
144	Many uses of hash tables want to store additional information
145	which each entry in the hash table.  Some also find it
146	convenient to store additional information with the hash table
147	itself.  This may be done using a derived hash table.
148
149	Since C is not an object oriented language, creating a derived
150	hash table requires sticking together some boilerplate
151	routines with a few differences specific to the type of hash
152	table you want to create.
153
154	An example of a derived hash table is the linker hash table.
155	The structures for this are defined in <<bfdlink.h>>.  The
156	functions are in <<linker.c>>.
157
158	You may also derive a hash table from an already derived hash
159	table.  For example, the a.out linker backend code uses a hash
160	table derived from the linker hash table.
161
162@menu
163@* Define the Derived Structures::
164@* Write the Derived Creation Routine::
165@* Write Other Derived Routines::
166@end menu
167
168INODE
169Define the Derived Structures, Write the Derived Creation Routine, Deriving a New Hash Table Type, Deriving a New Hash Table Type
170SUBSUBSECTION
171	Define the derived structures
172
173	You must define a structure for an entry in the hash table,
174	and a structure for the hash table itself.
175
176	The first field in the structure for an entry in the hash
177	table must be of the type used for an entry in the hash table
178	you are deriving from.  If you are deriving from a basic hash
179	table this is <<struct bfd_hash_entry>>, which is defined in
180	<<bfd.h>>.  The first field in the structure for the hash
181	table itself must be of the type of the hash table you are
182	deriving from itself.  If you are deriving from a basic hash
183	table, this is <<struct bfd_hash_table>>.
184
185	For example, the linker hash table defines <<struct
186	bfd_link_hash_entry>> (in <<bfdlink.h>>).  The first field,
187	<<root>>, is of type <<struct bfd_hash_entry>>.  Similarly,
188	the first field in <<struct bfd_link_hash_table>>, <<table>>,
189	is of type <<struct bfd_hash_table>>.
190
191INODE
192Write the Derived Creation Routine, Write Other Derived Routines, Define the Derived Structures, Deriving a New Hash Table Type
193SUBSUBSECTION
194	Write the derived creation routine
195
196	You must write a routine which will create and initialize an
197	entry in the hash table.  This routine is passed as the
198	function argument to <<bfd_hash_table_init>>.
199
200	In order to permit other hash tables to be derived from the
201	hash table you are creating, this routine must be written in a
202	standard way.
203
204	The first argument to the creation routine is a pointer to a
205	hash table entry.  This may be <<NULL>>, in which case the
206	routine should allocate the right amount of space.  Otherwise
207	the space has already been allocated by a hash table type
208	derived from this one.
209
210	After allocating space, the creation routine must call the
211	creation routine of the hash table type it is derived from,
212	passing in a pointer to the space it just allocated.  This
213	will initialize any fields used by the base hash table.
214
215	Finally the creation routine must initialize any local fields
216	for the new hash table type.
217
218	Here is a boilerplate example of a creation routine.
219	@var{function_name} is the name of the routine.
220	@var{entry_type} is the type of an entry in the hash table you
221	are creating.  @var{base_newfunc} is the name of the creation
222	routine of the hash table type your hash table is derived
223	from.
224
225EXAMPLE
226
227.struct bfd_hash_entry *
228.@var{function_name} (entry, table, string)
229.     struct bfd_hash_entry *entry;
230.     struct bfd_hash_table *table;
231.     const char *string;
232.{
233.  struct @var{entry_type} *ret = (@var{entry_type} *) entry;
234.
235. {* Allocate the structure if it has not already been allocated by a
236.    derived class.  *}
237.  if (ret == (@var{entry_type} *) NULL)
238.    {
239.      ret = ((@var{entry_type} *)
240.	      bfd_hash_allocate (table, sizeof (@var{entry_type})));
241.      if (ret == (@var{entry_type} *) NULL)
242.        return NULL;
243.    }
244.
245. {* Call the allocation method of the base class.  *}
246.  ret = ((@var{entry_type} *)
247.	 @var{base_newfunc} ((struct bfd_hash_entry *) ret, table, string));
248.
249. {* Initialize the local fields here.  *}
250.
251.  return (struct bfd_hash_entry *) ret;
252.}
253
254DESCRIPTION
255	The creation routine for the linker hash table, which is in
256	<<linker.c>>, looks just like this example.
257	@var{function_name} is <<_bfd_link_hash_newfunc>>.
258	@var{entry_type} is <<struct bfd_link_hash_entry>>.
259	@var{base_newfunc} is <<bfd_hash_newfunc>>, the creation
260	routine for a basic hash table.
261
262	<<_bfd_link_hash_newfunc>> also initializes the local fields
263	in a linker hash table entry: <<type>>, <<written>> and
264	<<next>>.
265
266INODE
267Write Other Derived Routines, , Write the Derived Creation Routine, Deriving a New Hash Table Type
268SUBSUBSECTION
269	Write other derived routines
270
271	You will want to write other routines for your new hash table,
272	as well.
273
274	You will want an initialization routine which calls the
275	initialization routine of the hash table you are deriving from
276	and initializes any other local fields.  For the linker hash
277	table, this is <<_bfd_link_hash_table_init>> in <<linker.c>>.
278
279	You will want a lookup routine which calls the lookup routine
280	of the hash table you are deriving from and casts the result.
281	The linker hash table uses <<bfd_link_hash_lookup>> in
282	<<linker.c>> (this actually takes an additional argument which
283	it uses to decide how to return the looked up value).
284
285	You may want a traversal routine.  This should just call the
286	traversal routine of the hash table you are deriving from with
287	appropriate casts.  The linker hash table uses
288	<<bfd_link_hash_traverse>> in <<linker.c>>.
289
290	These routines may simply be defined as macros.  For example,
291	the a.out backend linker hash table, which is derived from the
292	linker hash table, uses macros for the lookup and traversal
293	routines.  These are <<aout_link_hash_lookup>> and
294	<<aout_link_hash_traverse>> in aoutx.h.
295*/
296
297/* The default number of entries to use when creating a hash table.  */
298#define DEFAULT_SIZE (4051)
299
300/* Create a new hash table, given a number of entries.  */
301
302boolean
303bfd_hash_table_init_n (table, newfunc, size)
304     struct bfd_hash_table *table;
305     struct bfd_hash_entry *(*newfunc) PARAMS ((struct bfd_hash_entry *,
306						struct bfd_hash_table *,
307						const char *));
308     unsigned int size;
309{
310  unsigned int alloc;
311
312  alloc = size * sizeof (struct bfd_hash_entry *);
313
314  table->memory = (PTR) objalloc_create ();
315  if (table->memory == NULL)
316    {
317      bfd_set_error (bfd_error_no_memory);
318      return false;
319    }
320  table->table = ((struct bfd_hash_entry **)
321		  objalloc_alloc ((struct objalloc *) table->memory, alloc));
322  if (table->table == NULL)
323    {
324      bfd_set_error (bfd_error_no_memory);
325      return false;
326    }
327  memset ((PTR) table->table, 0, alloc);
328  table->size = size;
329  table->newfunc = newfunc;
330  return true;
331}
332
333/* Create a new hash table with the default number of entries.  */
334
335boolean
336bfd_hash_table_init (table, newfunc)
337     struct bfd_hash_table *table;
338     struct bfd_hash_entry *(*newfunc) PARAMS ((struct bfd_hash_entry *,
339						struct bfd_hash_table *,
340						const char *));
341{
342  return bfd_hash_table_init_n (table, newfunc, DEFAULT_SIZE);
343}
344
345/* Free a hash table.  */
346
347void
348bfd_hash_table_free (table)
349     struct bfd_hash_table *table;
350{
351  objalloc_free ((struct objalloc *) table->memory);
352  table->memory = NULL;
353}
354
355/* Look up a string in a hash table.  */
356
357struct bfd_hash_entry *
358bfd_hash_lookup (table, string, create, copy)
359     struct bfd_hash_table *table;
360     const char *string;
361     boolean create;
362     boolean copy;
363{
364  register const unsigned char *s;
365  register unsigned long hash;
366  register unsigned int c;
367  struct bfd_hash_entry *hashp;
368  unsigned int len;
369  unsigned int index;
370
371  hash = 0;
372  len = 0;
373  s = (const unsigned char *) string;
374  while ((c = *s++) != '\0')
375    {
376      hash += c + (c << 17);
377      hash ^= hash >> 2;
378      ++len;
379    }
380  hash += len + (len << 17);
381  hash ^= hash >> 2;
382
383  index = hash % table->size;
384  for (hashp = table->table[index];
385       hashp != (struct bfd_hash_entry *) NULL;
386       hashp = hashp->next)
387    {
388      if (hashp->hash == hash
389	  && strcmp (hashp->string, string) == 0)
390	return hashp;
391    }
392
393  if (! create)
394    return (struct bfd_hash_entry *) NULL;
395
396  hashp = (*table->newfunc) ((struct bfd_hash_entry *) NULL, table, string);
397  if (hashp == (struct bfd_hash_entry *) NULL)
398    return (struct bfd_hash_entry *) NULL;
399  if (copy)
400    {
401      char *new;
402
403      new = (char *) objalloc_alloc ((struct objalloc *) table->memory,
404				     len + 1);
405      if (!new)
406	{
407	  bfd_set_error (bfd_error_no_memory);
408	  return (struct bfd_hash_entry *) NULL;
409	}
410      strcpy (new, string);
411      string = new;
412    }
413  hashp->string = string;
414  hashp->hash = hash;
415  hashp->next = table->table[index];
416  table->table[index] = hashp;
417
418  return hashp;
419}
420
421/* Replace an entry in a hash table.  */
422
423void
424bfd_hash_replace (table, old, nw)
425     struct bfd_hash_table *table;
426     struct bfd_hash_entry *old;
427     struct bfd_hash_entry *nw;
428{
429  unsigned int index;
430  struct bfd_hash_entry **pph;
431
432  index = old->hash % table->size;
433  for (pph = &table->table[index];
434       (*pph) != (struct bfd_hash_entry *) NULL;
435       pph = &(*pph)->next)
436    {
437      if (*pph == old)
438	{
439	  *pph = nw;
440	  return;
441	}
442    }
443
444  abort ();
445}
446
447/* Base method for creating a new hash table entry.  */
448
449/*ARGSUSED*/
450struct bfd_hash_entry *
451bfd_hash_newfunc (entry, table, string)
452     struct bfd_hash_entry *entry;
453     struct bfd_hash_table *table;
454     const char *string ATTRIBUTE_UNUSED;
455{
456  if (entry == (struct bfd_hash_entry *) NULL)
457    entry = ((struct bfd_hash_entry *)
458	     bfd_hash_allocate (table, sizeof (struct bfd_hash_entry)));
459  return entry;
460}
461
462/* Allocate space in a hash table.  */
463
464PTR
465bfd_hash_allocate (table, size)
466     struct bfd_hash_table *table;
467     unsigned int size;
468{
469  PTR ret;
470
471  ret = objalloc_alloc ((struct objalloc *) table->memory, size);
472  if (ret == NULL && size != 0)
473    bfd_set_error (bfd_error_no_memory);
474  return ret;
475}
476
477/* Traverse a hash table.  */
478
479void
480bfd_hash_traverse (table, func, info)
481     struct bfd_hash_table *table;
482     boolean (*func) PARAMS ((struct bfd_hash_entry *, PTR));
483     PTR info;
484{
485  unsigned int i;
486
487  for (i = 0; i < table->size; i++)
488    {
489      struct bfd_hash_entry *p;
490
491      for (p = table->table[i]; p != NULL; p = p->next)
492	{
493	  if (! (*func) (p, info))
494	    return;
495	}
496    }
497}
498
499/* A few different object file formats (a.out, COFF, ELF) use a string
500   table.  These functions support adding strings to a string table,
501   returning the byte offset, and writing out the table.
502
503   Possible improvements:
504   + look for strings matching trailing substrings of other strings
505   + better data structures?  balanced trees?
506   + look at reducing memory use elsewhere -- maybe if we didn't have
507     to construct the entire symbol table at once, we could get by
508     with smaller amounts of VM?  (What effect does that have on the
509     string table reductions?)  */
510
511/* An entry in the strtab hash table.  */
512
513struct strtab_hash_entry
514{
515  struct bfd_hash_entry root;
516  /* Index in string table.  */
517  bfd_size_type index;
518  /* Next string in strtab.  */
519  struct strtab_hash_entry *next;
520};
521
522/* The strtab hash table.  */
523
524struct bfd_strtab_hash
525{
526  struct bfd_hash_table table;
527  /* Size of strtab--also next available index.  */
528  bfd_size_type size;
529  /* First string in strtab.  */
530  struct strtab_hash_entry *first;
531  /* Last string in strtab.  */
532  struct strtab_hash_entry *last;
533  /* Whether to precede strings with a two byte length, as in the
534     XCOFF .debug section.  */
535  boolean xcoff;
536};
537
538static struct bfd_hash_entry *strtab_hash_newfunc
539  PARAMS ((struct bfd_hash_entry *, struct bfd_hash_table *, const char *));
540
541/* Routine to create an entry in a strtab.  */
542
543static struct bfd_hash_entry *
544strtab_hash_newfunc (entry, table, string)
545     struct bfd_hash_entry *entry;
546     struct bfd_hash_table *table;
547     const char *string;
548{
549  struct strtab_hash_entry *ret = (struct strtab_hash_entry *) entry;
550
551  /* Allocate the structure if it has not already been allocated by a
552     subclass.  */
553  if (ret == (struct strtab_hash_entry *) NULL)
554    ret = ((struct strtab_hash_entry *)
555	   bfd_hash_allocate (table, sizeof (struct strtab_hash_entry)));
556  if (ret == (struct strtab_hash_entry *) NULL)
557    return NULL;
558
559  /* Call the allocation method of the superclass.  */
560  ret = ((struct strtab_hash_entry *)
561	 bfd_hash_newfunc ((struct bfd_hash_entry *) ret, table, string));
562
563  if (ret)
564    {
565      /* Initialize the local fields.  */
566      ret->index = (bfd_size_type) -1;
567      ret->next = NULL;
568    }
569
570  return (struct bfd_hash_entry *) ret;
571}
572
573/* Look up an entry in an strtab.  */
574
575#define strtab_hash_lookup(t, string, create, copy) \
576  ((struct strtab_hash_entry *) \
577   bfd_hash_lookup (&(t)->table, (string), (create), (copy)))
578
579/* Create a new strtab.  */
580
581struct bfd_strtab_hash *
582_bfd_stringtab_init ()
583{
584  struct bfd_strtab_hash *table;
585  bfd_size_type amt = sizeof (struct bfd_strtab_hash);
586
587  table = (struct bfd_strtab_hash *) bfd_malloc (amt);
588  if (table == NULL)
589    return NULL;
590
591  if (! bfd_hash_table_init (&table->table, strtab_hash_newfunc))
592    {
593      free (table);
594      return NULL;
595    }
596
597  table->size = 0;
598  table->first = NULL;
599  table->last = NULL;
600  table->xcoff = false;
601
602  return table;
603}
604
605/* Create a new strtab in which the strings are output in the format
606   used in the XCOFF .debug section: a two byte length precedes each
607   string.  */
608
609struct bfd_strtab_hash *
610_bfd_xcoff_stringtab_init ()
611{
612  struct bfd_strtab_hash *ret;
613
614  ret = _bfd_stringtab_init ();
615  if (ret != NULL)
616    ret->xcoff = true;
617  return ret;
618}
619
620/* Free a strtab.  */
621
622void
623_bfd_stringtab_free (table)
624     struct bfd_strtab_hash *table;
625{
626  bfd_hash_table_free (&table->table);
627  free (table);
628}
629
630/* Get the index of a string in a strtab, adding it if it is not
631   already present.  If HASH is false, we don't really use the hash
632   table, and we don't eliminate duplicate strings.  */
633
634bfd_size_type
635_bfd_stringtab_add (tab, str, hash, copy)
636     struct bfd_strtab_hash *tab;
637     const char *str;
638     boolean hash;
639     boolean copy;
640{
641  register struct strtab_hash_entry *entry;
642
643  if (hash)
644    {
645      entry = strtab_hash_lookup (tab, str, true, copy);
646      if (entry == NULL)
647	return (bfd_size_type) -1;
648    }
649  else
650    {
651      entry = ((struct strtab_hash_entry *)
652	       bfd_hash_allocate (&tab->table,
653				  sizeof (struct strtab_hash_entry)));
654      if (entry == NULL)
655	return (bfd_size_type) -1;
656      if (! copy)
657	entry->root.string = str;
658      else
659	{
660	  char *n;
661
662	  n = (char *) bfd_hash_allocate (&tab->table, strlen (str) + 1);
663	  if (n == NULL)
664	    return (bfd_size_type) -1;
665	  entry->root.string = n;
666	}
667      entry->index = (bfd_size_type) -1;
668      entry->next = NULL;
669    }
670
671  if (entry->index == (bfd_size_type) -1)
672    {
673      entry->index = tab->size;
674      tab->size += strlen (str) + 1;
675      if (tab->xcoff)
676	{
677	  entry->index += 2;
678	  tab->size += 2;
679	}
680      if (tab->first == NULL)
681	tab->first = entry;
682      else
683	tab->last->next = entry;
684      tab->last = entry;
685    }
686
687  return entry->index;
688}
689
690/* Get the number of bytes in a strtab.  */
691
692bfd_size_type
693_bfd_stringtab_size (tab)
694     struct bfd_strtab_hash *tab;
695{
696  return tab->size;
697}
698
699/* Write out a strtab.  ABFD must already be at the right location in
700   the file.  */
701
702boolean
703_bfd_stringtab_emit (abfd, tab)
704     register bfd *abfd;
705     struct bfd_strtab_hash *tab;
706{
707  register boolean xcoff;
708  register struct strtab_hash_entry *entry;
709
710  xcoff = tab->xcoff;
711
712  for (entry = tab->first; entry != NULL; entry = entry->next)
713    {
714      const char *str;
715      size_t len;
716
717      str = entry->root.string;
718      len = strlen (str) + 1;
719
720      if (xcoff)
721	{
722	  bfd_byte buf[2];
723
724	  /* The output length includes the null byte.  */
725	  bfd_put_16 (abfd, (bfd_vma) len, buf);
726	  if (bfd_bwrite ((PTR) buf, (bfd_size_type) 2, abfd) != 2)
727	    return false;
728	}
729
730      if (bfd_bwrite ((PTR) str, (bfd_size_type) len, abfd) != len)
731	return false;
732    }
733
734  return true;
735}
736