1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/* GDB symbol table format definitions.
24   Copyright (C) 1986 Free Software Foundation, Inc.
25
26GDB is distributed in the hope that it will be useful, but WITHOUT ANY
27WARRANTY.  No author or distributor accepts responsibility to anyone
28for the consequences of using it or for whether it serves any
29particular purpose or works at all, unless he says so in writing.
30Refer to the GDB General Public License for full details.
31
32Everyone is granted permission to copy, modify and redistribute GDB,
33but only under the conditions described in the GDB General Public
34License.  A copy of this license is supposed to have been given to you
35along with GDB so you can know your rights and responsibilities.  It
36should be in a file named COPYING.  Among other things, the copyright
37notice and this notice must be preserved on all copies.
38
39In other words, go ahead and share GDB, but don't try to stop
40anyone else from sharing it farther.  Help stamp out software hoarding!
41*/
42
43/* Format of GDB symbol table data.
44   There is one symbol segment for each source file or
45   independant compilation.  These segments are simply concatenated
46   to form the GDB symbol table.  A zero word where the beginning
47   of a segment is expected indicates there are no more segments.
48
49Format of a symbol segment:
50
51   The symbol segment begins with a word containing 1
52   if it is in the format described here.  Other formats may
53   be designed, with other code numbers.
54
55   The segment contains many objects which point at each other.
56   The pointers are offsets in bytes from the beginning of the segment.
57   Thus, each segment can be loaded into core and its pointers relocated
58   to make valid in-core pointers.
59
60   All the data objects in the segment can be found indirectly from
61   one of them, the root object, of type `struct symbol_root'.
62   It appears at the beginning of the segment.
63
64   The total size of the segment, in bytes, appears as the `length'
65   field of this object.  This size includes the size of the
66   root object.
67
68   All the object data types are defined here to contain pointer types
69   appropriate for in-core use on a relocated symbol segment.
70   Casts to and from type int are required for working with
71   unrelocated symbol segments such as are found in the file.
72
73   The ldsymaddr word is filled in by the loader to contain
74   the offset (in bytes) within the ld symbol table
75   of the first nonglobal symbol from this compilation.
76   This makes it possible to match those symbols
77   (which contain line number information) reliably with
78   the segment they go with.
79
80   Core addresses within the program that appear in the symbol segment
81   are not relocated by the loader.  They are inserted by the assembler
82   and apply to addresses as output by the assembler, so GDB must
83   relocate them when it loads the symbol segment.  It gets the information
84   on how to relocate from the textrel, datarel, bssrel, databeg and bssbeg
85   words of the root object.
86
87   The words textrel, datarel and bssrel
88   are filled in by ld with the amounts to relocate within-the-file
89   text, data and bss addresses by; databeg and bssbeg can be
90   used to tell which kind of relocation an address needs.  */
91
92enum language {language_c};
93
94/*
95 * All symbol roots must have as their first two fields format and length
96 * fields.  The total length of the symbol root must be a multiple of
97 * sizeof(uint32_t) and any padding must be zeroed.
98 */
99struct symbol_root_header
100{
101  int format;	/* type of symbol segment */
102  int length;	/* # bytes in this symbol segment, rounded to sizeof(uint32_t) */
103};
104
105/*
106 * Constants for symbol root format fields
107 */
108#define SYMBOL_ROOT_FORMAT	1
109#define INDIRECT_ROOT_FORMAT	1002
110#define COMMON_ROOT_FORMAT	1003
111#define SHLIB_ROOT_FORMAT	1004
112#define ALIAS_ROOT_FORMAT	1005
113#define MACH_ROOT_FORMAT		2001
114#define MACH_INDIRECT_ROOT_FORMAT	2002
115#define MACH_SHLIB_ROOT_FORMAT		2004
116
117
118struct symbol_root
119{
120  int format;			/* SYMBOL_ROOT_FORMAT */
121  int length;			/* # bytes in this symbol segment */
122  int ldsymoff;			/* Offset in ld symtab of this file's syms */
123  int textrel;			/* Relocation for text addresses */
124  int datarel;			/* Relocation for data addresses */
125  int bssrel;			/* Relocation for bss addresses */
126  char *filename;		/* Name of main source file compiled */
127  char *filedir;		/* Name of directory it was reached from */
128  struct blockvector *blockvector; /* Vector of all symbol-naming blocks */
129  struct typevector *typevector; /* Vector of all data types */
130  enum language language;	/* Code identifying the language used */
131  char *version;		/* Version info.  Not fully specified */
132  char *compilation;		/* Compilation info.  Not fully specified */
133  int databeg;			/* Address within the file of data start */
134  int bssbeg;			/* Address within the file of bss start */
135  struct sourcevector *sourcevector; /* Vector of line-number info */
136};
137
138struct mach_root
139{
140  int format;			/* MACH_ROOT_FORMAT */
141  int length;			/* # bytes in this symbol segment */
142  int ldsymoff;			/* Offset in ld symtab of this file's syms */
143  struct loadmap *loadmap;	/* load map of the relocatable object */
144  char *filename;		/* Name of main source file compiled */
145  char *filedir;		/* Name of directory it was reached from */
146  struct blockvector *blockvector; /* Vector of all symbol-naming blocks */
147  struct typevector *typevector; /* Vector of all data types */
148  enum language language;	/* Code identifying the language used */
149  char *version;		/* Version info.  Not fully specified */
150  char *compilation;		/* Compilation info.  Not fully specified */
151  struct sourcevector *sourcevector; /* Vector of line-number info */
152};
153
154/*
155 * Indirect symbol root format.  Written by ld when -g is used (the default).
156 * This is for lazy evaluation of the -gg symbol segments.
157 */
158struct indirect_root {
159  int format;		/* INDIRECT_ROOT_FORMAT */
160  int length;		/* length of this struct, rounded to sizeof(uint32_t) */
161  int ldsymoff;		/* Offset in ld symtab of this file's syms */
162  int textrel;		/* Relocation for text addresses */
163  int datarel;		/* Relocation for data addresses */
164  int bssrel;		/* Relocation for bss addresses */
165  int textsize;		/* text size */
166  int datasize;		/* data size */
167  int bsssize;		/* bss size */
168  int mtime;		/* last modified time, as returned by stat(2) */
169  int fileoffset;	/* Offset in file that contains symbol_root */
170  char filename[1];	/* variable length file name, zero padded */
171};
172
173/*
174 * Mach indirect symbol root format.  Written by ld when -g is used (the
175 * default).  This is for lazy evaluation of the -gg symbol segments.
176 */
177struct mach_indirect_root {
178  int format;		/* MACH_INDIRECT_ROOT_FORMAT */
179  int length;		/* length of this struct, rounded to sizeof(uint32_t) */
180  int ldsymoff;		/* Offset in ld symtab of this file's syms */
181  struct loadmap *loadmap; /* load map of the relocatable object */
182  int mtime;		/* last modified time, as returned by stat(2) */
183  int fileoffset;	/* Offset in relocatable file that contains the
184			   mach_root */
185  char filename[1];	/* variable length file name, zero padded */
186};
187
188/*
189 * common symbol root format.  For each common symbol that the link editor
190 * defines the storage for that symbol name is recorded in here.
191 */
192struct common_root {
193  int format;		/* COMMON_SYM_FORMAT */
194  int length;		/* length of this struct, rounded to sizeof(uint32_t) */
195  int nsyms;		/* the number of strings in the data[] field for the
196			   common symbols names of this file */
197  char data[1];
198  /* Data looks like the following:
199    - Null terminated string for the filename.
200	- Null terminated stings for syms.
201	...
202    - zero padded to round to sizeof(uint32_t)
203   */
204};
205
206/*
207 * shlib_root: Written by ld for target shared library output.  This has two
208 * fields for each of the data segment fields. The data segments of .o files
209 * that go into target shared libraries have all their static data first in
210 * the data segment followed by all the global data.  When it is loaded into
211 * a target shared library the global data from all the .o files is placed
212 * first in the data segment then all of the static data.  So this information
213 * is reflected in the {global,static}datarel and the {global,static}databeg
214 * fields.
215 *
216 * After one of these has been written for each object in the shared library
217 * then the symbol root from each object is written into the shared library.
218 */
219struct shlib_root {
220  int format;		/* SHLIB_ROOT_FORMAT */
221  int length;		/* length of this struct, rounded to sizeof(uint32_t) */
222  int ldsymoff;		/* Offset in ld symtab of this file's syms */
223  int textrel;		/* Relocation for text addresses */
224  int globaldatarel;	/* Relocation for global data addresses */
225  int staticdatarel;	/* Relocation for static data addresses */
226  int globaldatabeg;	/* Address of the global data start */
227  int staticdatabeg;	/* Address of the static data start */
228  int globaldatasize;	/* global data size */
229  int staticdatasize;	/* static data size */
230  int symreloffset;	/* relitive offset, from the first SYMBOL_ROOT_FORMAT
231			   of the symbol root for this file */
232  char filename[1];	/* variable length file name, zero padded */
233};
234
235struct mach_shlib_root {
236  int format;		/* MACH_SHLIB_ROOT_FORMAT */
237  int length;		/* length of this struct, rounded to sizeof(uint32_t) */
238  int ldsymoff;		/* Offset in ld symtab of this file's syms */
239  struct loadmap *loadmap; /* load map of the relocatable object */
240  int symreloffset;	/* relitive offset, from the first SYMBOL_ROOT_FORMAT
241			   of the symbol root for this file */
242  char filename[1];	/* variable length file name, zero padded */
243};
244
245/*
246 * This format is used when the link-editor alias option -a original:alias
247 * is used when producing an output file.  This option changes symbols in
248 * the .o file from 'original' to 'alias' in the a.out file.
249 */
250struct alias_root {
251  int format;		/* ALIAS_SYM_FORMAT */
252  int length;		/* length of this struct, rounded to sizeof(uint32_t) */
253  int naliases;		/* number of pairs of aliased symbols */
254  char data[1];
255  /* Data looks like the following:
256	- Pairs of:
257	    - Null terminated string for the original symbol
258	    - Null terminated string for the aliased symbol
259	- zero padded to round to sizeof(uint32_t)
260   */
261};
262
263/*
264 * The load map describes where the parts the relocatable object have been
265 * loaded in the executable.  The enitre address space of the relocatable
266 * is to be covered by all the map entries.  There may be multiple map entries
267 * for a single section or one map entry for multiple sections.  This allows
268 * the link editor to scatter load a section based on information that improves
269 * performance by increasing the locality of reference.
270 */
271struct loadmap
272{
273  /* Number of maps in the list.  */
274  int nmaps;
275  /* The maps themselves.  */
276  struct map *map[1];
277};
278struct map
279{
280  /* The starting address in the relocatable object and size of part of the
281     object file. */
282  int reladdr, size;
283  /* The address the loader loaded this part of the object file at */
284  int ldaddr;
285};
286
287
288/* All data types of symbols in the compiled program
289   are represented by `struct type' objects.
290   All of these objects are pointed to by the typevector.
291   The type vector may have empty slots that contain zero.  */
292
293struct typevector
294{
295  int length;			/* Number of types described */
296  struct type *type[1];
297};
298
299/* Different kinds of data types are distinguished by the `code' field.  */
300
301enum type_code
302{
303  TYPE_CODE_UNDEF,		/* Not used; catches errors */
304  TYPE_CODE_PTR,		/* Pointer type */
305  TYPE_CODE_ARRAY,		/* Array type, lower bound zero */
306  TYPE_CODE_STRUCT,		/* C struct or Pascal record */
307  TYPE_CODE_UNION,		/* C union or Pascal variant part */
308  TYPE_CODE_ENUM,		/* Enumeration type */
309  TYPE_CODE_FUNC,		/* Function type */
310  TYPE_CODE_INT,		/* Integer type */
311  TYPE_CODE_FLT,		/* Floating type */
312  TYPE_CODE_VOID,		/* Void type (values zero length) */
313  TYPE_CODE_SET,		/* Pascal sets */
314  TYPE_CODE_RANGE,		/* Range (integers within spec'd bounds) */
315  TYPE_CODE_PASCAL_ARRAY,	/* Array with explicit type of index */
316};
317
318/* This appears in a type's flags word for an unsigned integer type.  */
319#define TYPE_FLAG_UNSIGNED 1
320
321/* Other flag bits are used with GDB.  */
322
323struct type
324{
325  /* Code for kind of type */
326  enum type_code code;
327  /* Name of this type, or zero if none.
328     This is used for printing only.
329     Type names specified as input are defined by symbols.  */
330  char *name;
331  /* Length in bytes of storage for a value of this type */
332  int length;
333  /* For a pointer type, describes the type of object pointed to.
334     For an array type, describes the type of the elements.
335     For a function type, describes the type of the value.
336     Unused otherwise.  */
337  struct type *target_type;
338  /* Type that is a pointer to this type.
339     Zero if no such pointer-to type is known yet.
340     The debugger may add the address of such a type
341     if it has to construct one later.  */
342  struct type *pointer_type;
343  /* Type that is a function returning this type.
344     Zero if no such function type is known here.
345     The debugger may add the address of such a type
346     if it has to construct one later.  */
347  struct type *function_type;
348  /* Flags about this type.  */
349  short flags;
350  /* Number of fields described for this type */
351  short nfields;
352  /* For structure and union types, a description of each field.
353     For set and pascal array types, there is one "field",
354     whose type is the domain type of the set or array.
355     For range types, there are two "fields",
356     the minimum and maximum values (both inclusive).
357     For enum types, each possible value is described by one "field".
358     For range types, there are two "fields", that record constant values
359     (inclusive) for the minimum and maximum.
360
361     Using a pointer to a separate array of fields
362     allows all types to have the same size, which is useful
363     because we can allocate the space for a type before
364     we know what to put in it.  */
365  struct field
366    {
367      /* Position of this field, counting in bits from start of
368	 containing structure.  For a function type, this is the
369	 position in the argument list of this argument.
370	 For a range bound or enum value, this is the value itself.  */
371      int bitpos;
372      /* Size of this field, in bits, or zero if not packed.
373	 For an unpacked field, the field's type's length
374	 says how many bytes the field occupies.  */
375      int bitsize;
376      /* In a struct or enum type, type of this field.
377	 In a function type, type of this argument.
378	 In an array type, the domain-type of the array.  */
379      struct type *type;
380      /* Name of field, value or argument.
381	 Zero for range bounds and array domains.  */
382      char *name;
383    } *fields;
384};
385
386/* All of the name-scope contours of the program
387   are represented by `struct block' objects.
388   All of these objects are pointed to by the blockvector.
389
390   Each block represents one name scope.
391   Each lexical context has its own block.
392
393   The first two blocks in the blockvector are special.
394   The first one contains all the symbols defined in this compilation
395   whose scope is the entire program linked together.
396   The second one contains all the symbols whose scope is the
397   entire compilation excluding other separate compilations.
398   In C, these correspond to global symbols and static symbols.
399
400   Each block records a range of core addresses for the code that
401   is in the scope of the block.  The first two special blocks
402   give, for the range of code, the entire range of code produced
403   by the compilation that the symbol segment belongs to.
404
405   The blocks appear in the blockvector
406   in order of increasing starting-address,
407   and, within that, in order of decreasing ending-address.
408
409   This implies that within the body of one function
410   the blocks appear in the order of a depth-first tree walk.  */
411
412struct blockvector
413{
414  /* Number of blocks in the list.  */
415  int nblocks;
416  /* The blocks themselves.  */
417  struct block *block[1];
418};
419
420struct block
421{
422  /* Addresses in the executable code that are in this block.
423     Note: in an unrelocated symbol segment in a file,
424     these are always zero.  They can be filled in from the
425     N_LBRAC and N_RBRAC symbols in the loader symbol table.  */
426  int startaddr, endaddr;
427  /* The symbol that names this block,
428     if the block is the body of a function;
429     otherwise, zero.
430     Note: In an unrelocated symbol segment in an object file,
431     this field may be zero even when the block has a name.
432     That is because the block is output before the name
433     (since the name resides in a higher block).
434     Since the symbol does point to the block (as its value),
435     it is possible to find the block and set its name properly.  */
436  struct symbol *function;
437  /* The `struct block' for the containing block, or 0 if none.  */
438  /* Note that in an unrelocated symbol segment in an object file
439     this pointer may be zero when the correct value should be
440     the second special block (for symbols whose scope is one compilation).
441     This is because the compiler ouptuts the special blocks at the
442     very end, after the other blocks.   */
443  struct block *superblock;
444  /* Number of local symbols.  */
445  int nsyms;
446  /* The symbols.  */
447  struct symbol *sym[1];
448};
449
450/* Represent one symbol name; a variable, constant, function or typedef.  */
451
452/* Different name spaces for symbols.  Looking up a symbol specifies
453   a namespace and ignores symbol definitions in other name spaces.
454
455   VAR_NAMESPACE is the usual namespace.
456   In C, this contains variables, function names, typedef names
457   and enum type values.
458
459   STRUCT_NAMESPACE is used in C to hold struct, union and enum type names.
460   Thus, if `struct foo' is used in a C program,
461   it produces a symbol named `foo' in the STRUCT_NAMESPACE.
462
463   LABEL_NAMESPACE may be used for names of labels (for gotos);
464   currently it is not used and labels are not recorded at all.  */
465
466/* For a non-global symbol allocated statically,
467   the correct core address cannot be determined by the compiler.
468   The compiler puts an index number into the symbol's value field.
469   This index number can be matched with the "desc" field of
470   an entry in the loader symbol table.  */
471
472enum namespace
473{
474  UNDEF_NAMESPACE, VAR_NAMESPACE, STRUCT_NAMESPACE, LABEL_NAMESPACE,
475};
476
477/* An address-class says where to find the value of the symbol in core.  */
478
479enum address_class
480{
481  LOC_UNDEF,		/* Not used; catches errors */
482  LOC_CONST,		/* Value is constant int */
483  LOC_STATIC,		/* Value is at fixed address */
484  LOC_REGISTER,		/* Value is in register */
485  LOC_ARG,		/* Value is at spec'd position in arglist */
486  LOC_LOCAL,		/* Value is at spec'd pos in stack frame */
487  LOC_TYPEDEF,		/* Value not used; definition in SYMBOL_TYPE
488			   Symbols in the namespace STRUCT_NAMESPACE
489			   all have this class.  */
490  LOC_LABEL,		/* Value is address in the code */
491  LOC_BLOCK,		/* Value is address of a `struct block'.
492			   Function names have this class.  */
493  LOC_EXTERNAL,		/* Value is at address not in this compilation.
494			   This is used for .comm symbols
495			   and for extern symbols within functions.
496			   Inside GDB, this is changed to LOC_STATIC once the
497			   real address is obtained from a loader symbol.  */
498  LOC_CONST_BYTES	/* Value is a constant byte-sequence.   */
499};
500
501struct symbol
502{
503  /* Symbol name */
504  char *name;
505  /* Name space code.  */
506  enum namespace namespace;
507  /* Address class */
508  enum address_class class;
509  /* Data type of value */
510  struct type *type;
511  /* constant value, or address if static, or register number,
512     or offset in arguments, or offset in stack frame.  */
513  union
514    {
515      long value;
516      struct block *block;      /* for LOC_BLOCK */
517      char *bytes;		/* for LOC_CONST_BYTES */
518    }
519  value;
520};
521
522/* Source-file information.
523   This describes the relation between source files and line numbers
524   and addresses in the program text.  */
525
526struct sourcevector
527{
528  int length;			/* Number of source files described */
529  struct source *source[1];	/* Descriptions of the files */
530};
531
532/* Each item is either minus a line number, or a program counter.
533   If it represents a line number, that is the line described by the next
534   program counter value.  If it is positive, it is the program
535   counter at which the code for the next line starts.
536
537   Consecutive lines can be recorded by program counter entries
538   with no line number entries between them.  Line number entries
539   are used when there are lines to skip with no code on them.
540   This is to make the table shorter.  */
541
542struct linetable
543  {
544    int nitems;
545    int item[1];
546  };
547
548/* All the information on one source file.  */
549
550struct source
551{
552  char *name;			/* Name of file */
553  struct linetable contents;
554};
555