1/* Interface between the opcode library and its callers.
2
3   Copyright (C) 1999-2022 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 51 Franklin Street - Fifth Floor,
18   Boston, MA 02110-1301, USA.
19
20   Written by Cygnus Support, 1993.
21
22   The opcode library (libopcodes.a) provides instruction decoders for
23   a large variety of instruction sets, callable with an identical
24   interface, for making instruction-processing programs more independent
25   of the instruction set being processed.  */
26
27#ifndef DIS_ASM_H
28#define DIS_ASM_H
29
30#ifdef __cplusplus
31extern "C" {
32#endif
33
34#include <stdio.h>
35#include <string.h>
36#include "bfd.h"
37
38enum dis_insn_type
39{
40  dis_noninsn,			/* Not a valid instruction.  */
41  dis_nonbranch,		/* Not a branch instruction.  */
42  dis_branch,			/* Unconditional branch.  */
43  dis_condbranch,		/* Conditional branch.  */
44  dis_jsr,			/* Jump to subroutine.  */
45  dis_condjsr,			/* Conditional jump to subroutine.  */
46  dis_dref,			/* Data reference instruction.  */
47  dis_dref2			/* Two data references in instruction.  */
48};
49
50/* When printing styled disassembler output, this describes what style
51   should be used.  */
52
53enum disassembler_style
54{
55  /* This is the default style, use this for any additional syntax
56     (e.g. commas between operands, brackets, etc), or just as a default if
57     no other style seems appropriate.  */
58  dis_style_text,
59
60  /* Use this for all instruction mnemonics, or aliases for mnemonics.
61     These should be things that correspond to real machine
62     instructions.  */
63  dis_style_mnemonic,
64
65  /* Some architectures include additional mnemonic like fields within the
66     instruction operands, e.g. on aarch64 'add w16, w7, w1, lsl #2' where
67     the 'lsl' is an additional piece of text that describes how the
68     instruction should behave.  This sub-mnemonic style can be used for
69     these pieces of text.  */
70  dis_style_sub_mnemonic,
71
72  /* For things that aren't real machine instructions, but rather
73     assembler directives, e.g. .byte, etc.  */
74  dis_style_assembler_directive,
75
76  /* Use this for any register names.  This may or may-not include any
77     register prefix, e.g. '$', '%', at the discretion of the target,
78     though within each target the choice to include prefixes for not
79     should be kept consistent.  If the prefix is not printed with this
80     style, then dis_style_text should be used.  */
81  dis_style_register,
82
83  /* Use this for any constant values used within instructions or
84     directives, unless the value is an absolute address, or an offset
85     that will be added to an address (no matter where the address comes
86     from) before use.  This style may, or may-not be used for any
87     prefix to the immediate value, e.g. '$', at the discretion of the
88     target, though within each target the choice to include these
89     prefixes should be kept consistent.  */
90  dis_style_immediate,
91
92  /* The style for the numerical representation of an absolute address.
93     Anything that is an address offset should use the immediate style.
94     This style may, or may-not be used for any prefix to the immediate
95     value, e.g. '$', at the discretion of the target, though within
96     each target the choice to include these prefixes should be kept
97     consistent.  */
98  dis_style_address,
99
100  /* The style for any constant value within an instruction or directive
101     that represents an offset that will be added to an address before
102     use.  This style may, or may-not be used for any prefix to the
103     immediate value, e.g. '$', at the discretion of the target, though
104     within each target the choice to include these prefixes should be
105     kept consistent.  */
106  dis_style_address_offset,
107
108  /* The style for a symbol's name.  The numerical address of a symbol
109     should use the address style above, this style is reserved for the
110     name.  */
111  dis_style_symbol,
112
113  /* The start of a comment that runs to the end of the line.  Anything
114     printed after a comment start might be styled differently,
115     e.g. everything might be styled as a comment, regardless of the
116     actual style used.  The disassembler itself should not try to adjust
117     the style emitted for comment content, e.g. an address emitted within
118     a comment should still be given dis_style_address, in this way it is
119     up to the user of the disassembler to decide how comments should be
120     styled.  */
121  dis_style_comment_start
122};
123
124typedef int (*fprintf_ftype) (void *, const char*, ...) ATTRIBUTE_FPTR_PRINTF_2;
125typedef int (*fprintf_styled_ftype) (void *, enum disassembler_style, const char*, ...) ATTRIBUTE_FPTR_PRINTF_3;
126
127/* This struct is passed into the instruction decoding routine,
128   and is passed back out into each callback.  The various fields are used
129   for conveying information from your main routine into your callbacks,
130   for passing information into the instruction decoders (such as the
131   addresses of the callback functions), or for passing information
132   back from the instruction decoders to their callers.
133
134   It must be initialized before it is first passed; this can be done
135   by hand, or using one of the initialization macros below.  */
136
137typedef struct disassemble_info
138{
139  fprintf_ftype fprintf_func;
140  fprintf_styled_ftype fprintf_styled_func;
141  void *stream;
142  void *application_data;
143
144  /* Target description.  We could replace this with a pointer to the bfd,
145     but that would require one.  There currently isn't any such requirement
146     so to avoid introducing one we record these explicitly.  */
147  /* The bfd_flavour.  This can be bfd_target_unknown_flavour.  */
148  enum bfd_flavour flavour;
149  /* The bfd_arch value.  */
150  enum bfd_architecture arch;
151  /* The bfd_mach value.  */
152  unsigned long mach;
153  /* Endianness (for bi-endian cpus).  Mono-endian cpus can ignore this.  */
154  enum bfd_endian endian;
155  /* Endianness of code, for mixed-endian situations such as ARM BE8.  */
156  enum bfd_endian endian_code;
157
158  /* Some targets need information about the current section to accurately
159     display insns.  If this is NULL, the target disassembler function
160     will have to make its best guess.  */
161  asection *section;
162
163  /* An array of pointers to symbols either at the location being disassembled
164     or at the start of the function being disassembled.  The array is sorted
165     so that the first symbol is intended to be the one used.  The others are
166     present for any misc. purposes.  This is not set reliably, but if it is
167     not NULL, it is correct.  */
168  asymbol **symbols;
169  /* Number of symbols in array.  */
170  int num_symbols;
171
172  /* Symbol table provided for targets that want to look at it.  This is
173     used on Arm to find mapping symbols and determine Arm/Thumb code.  */
174  asymbol **symtab;
175  int symtab_pos;
176  int symtab_size;
177
178  /* For use by the disassembler.
179     The top 16 bits are reserved for public use (and are documented here).
180     The bottom 16 bits are for the internal use of the disassembler.  */
181  unsigned long flags;
182  /* Set if the disassembler has determined that there are one or more
183     relocations associated with the instruction being disassembled.  */
184#define INSN_HAS_RELOC	 (1u << 31)
185  /* Set if the user has requested the disassembly of data as well as code.  */
186#define DISASSEMBLE_DATA (1u << 30)
187  /* Set if the user has specifically set the machine type encoded in the
188     mach field of this structure.  */
189#define USER_SPECIFIED_MACHINE_TYPE (1u << 29)
190  /* Set if the user has requested wide output.  */
191#define WIDE_OUTPUT (1u << 28)
192
193  /* Dynamic relocations, if they have been loaded.  */
194  arelent **dynrelbuf;
195  long dynrelcount;
196
197  /* Use internally by the target specific disassembly code.  */
198  void *private_data;
199
200  /* Function used to get bytes to disassemble.  MEMADDR is the
201     address of the stuff to be disassembled, MYADDR is the address to
202     put the bytes in, and LENGTH is the number of bytes to read.
203     INFO is a pointer to this struct.
204     Returns an errno value or 0 for success.  */
205  int (*read_memory_func)
206    (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
207     struct disassemble_info *dinfo);
208
209  /* Function which should be called if we get an error that we can't
210     recover from.  STATUS is the errno value from read_memory_func and
211     MEMADDR is the address that we were trying to read.  INFO is a
212     pointer to this struct.  */
213  void (*memory_error_func)
214    (int status, bfd_vma memaddr, struct disassemble_info *dinfo);
215
216  /* Function called to print ADDR.  */
217  void (*print_address_func)
218    (bfd_vma addr, struct disassemble_info *dinfo);
219
220  /* Function called to determine if there is a symbol at the given ADDR.
221     If there is, the function returns 1, otherwise it returns 0.
222     This is used by ports which support an overlay manager where
223     the overlay number is held in the top part of an address.  In
224     some circumstances we want to include the overlay number in the
225     address, (normally because there is a symbol associated with
226     that address), but sometimes we want to mask out the overlay bits.  */
227  asymbol * (*symbol_at_address_func)
228    (bfd_vma addr, struct disassemble_info *dinfo);
229
230  /* Function called to check if a SYMBOL is can be displayed to the user.
231     This is used by some ports that want to hide special symbols when
232     displaying debugging outout.  */
233  bool (*symbol_is_valid)
234    (asymbol *, struct disassemble_info *dinfo);
235
236  /* These are for buffer_read_memory.  */
237  bfd_byte *buffer;
238  bfd_vma buffer_vma;
239  size_t buffer_length;
240
241  /* This variable may be set by the instruction decoder.  It suggests
242      the number of bytes objdump should display on a single line.  If
243      the instruction decoder sets this, it should always set it to
244      the same value in order to get reasonable looking output.  */
245  int bytes_per_line;
246
247  /* The next two variables control the way objdump displays the raw data.  */
248  /* For example, if bytes_per_line is 8 and bytes_per_chunk is 4, the */
249  /* output will look like this:
250     00:   00000000 00000000
251     with the chunks displayed according to "display_endian". */
252  int bytes_per_chunk;
253  enum bfd_endian display_endian;
254
255  /* Number of octets per incremented target address
256     Normally one, but some DSPs have byte sizes of 16 or 32 bits.  */
257  unsigned int octets_per_byte;
258
259  /* The number of zeroes we want to see at the end of a section before we
260     start skipping them.  */
261  unsigned int skip_zeroes;
262
263  /* The number of zeroes to skip at the end of a section.  If the number
264     of zeroes at the end is between SKIP_ZEROES_AT_END and SKIP_ZEROES,
265     they will be disassembled.  If there are fewer than
266     SKIP_ZEROES_AT_END, they will be skipped.  This is a heuristic
267     attempt to avoid disassembling zeroes inserted by section
268     alignment.  */
269  unsigned int skip_zeroes_at_end;
270
271  /* Whether the disassembler always needs the relocations.  */
272  bool disassembler_needs_relocs;
273
274  /* Results from instruction decoders.  Not all decoders yet support
275     this information.  This info is set each time an instruction is
276     decoded, and is only valid for the last such instruction.
277
278     To determine whether this decoder supports this information, set
279     insn_info_valid to 0, decode an instruction, then check it.  */
280
281  char insn_info_valid;		/* Branch info has been set. */
282  char branch_delay_insns;	/* How many sequential insn's will run before
283				   a branch takes effect.  (0 = normal) */
284  char data_size;		/* Size of data reference in insn, in bytes */
285  enum dis_insn_type insn_type;	/* Type of instruction */
286  bfd_vma target;		/* Target address of branch or dref, if known;
287				   zero if unknown.  */
288  bfd_vma target2;		/* Second target address for dref2 */
289
290  /* Command line options specific to the target disassembler.  */
291  const char *disassembler_options;
292
293  /* If non-zero then try not disassemble beyond this address, even if
294     there are values left in the buffer.  This address is the address
295     of the nearest symbol forwards from the start of the disassembly,
296     and it is assumed that it lies on the boundary between instructions.
297     If an instruction spans this address then this is an error in the
298     file being disassembled.  */
299  bfd_vma stop_vma;
300
301  /* The end range of the current range being disassembled.  This is required
302     in order to notify the disassembler when it's currently handling a
303     different range than it was before.  This prevent unsafe optimizations when
304     disassembling such as the way mapping symbols are found on AArch64.  */
305  bfd_vma stop_offset;
306
307  /* Set to true if the disassembler applied styling to the output,
308     otherwise, set to false.  */
309  bool created_styled_output;
310} disassemble_info;
311
312/* This struct is used to pass information about valid disassembler
313   option arguments from the target to the generic GDB functions
314   that set and display them.  */
315
316typedef struct
317{
318  /* Option argument name to use in descriptions.  */
319  const char *name;
320
321  /* Vector of acceptable option argument values, NULL-terminated.
322     NULL if any values are accepted.  */
323  const char **values;
324} disasm_option_arg_t;
325
326/* This struct is used to pass information about valid disassembler
327   options, their descriptions and arguments from the target to the
328   generic GDB functions that set and display them.  Options are
329   defined by tuples of vector entries at each index.  */
330
331typedef struct
332{
333  /* Vector of option names, NULL-terminated.  */
334  const char **name;
335
336  /* Vector of option descriptions or NULL if none to be shown.  */
337  const char **description;
338
339  /* Vector of option argument information pointers or NULL if no
340     option accepts an argument.  NULL entries denote individual
341     options that accept no argument.  */
342  const disasm_option_arg_t **arg;
343} disasm_options_t;
344
345/* This struct is used to pass information about valid disassembler
346   options and arguments from the target to the generic GDB functions
347   that set and display them.  */
348
349typedef struct
350{
351  /* Valid disassembler options.  Individual options that support
352     an argument will refer to entries in the ARGS vector.  */
353  disasm_options_t options;
354
355  /* Vector of acceptable option arguments, NULL-terminated.  This
356     collects all possible option argument choices, some of which
357     may be shared by different options from the OPTIONS member.  */
358  disasm_option_arg_t *args;
359} disasm_options_and_args_t;
360
361/* Standard disassemblers.  Disassemble one instruction at the given
362   target address.  Return number of octets processed.  */
363typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
364
365/* Disassemblers used out side of opcodes library.  */
366extern int print_insn_m32c		(bfd_vma, disassemble_info *);
367extern int print_insn_mep		(bfd_vma, disassemble_info *);
368extern int print_insn_s12z		(bfd_vma, disassemble_info *);
369extern int print_insn_sh		(bfd_vma, disassemble_info *);
370extern int print_insn_sparc		(bfd_vma, disassemble_info *);
371extern int print_insn_rx		(bfd_vma, disassemble_info *);
372extern int print_insn_rl78		(bfd_vma, disassemble_info *);
373extern int print_insn_rl78_g10		(bfd_vma, disassemble_info *);
374extern int print_insn_rl78_g13		(bfd_vma, disassemble_info *);
375extern int print_insn_rl78_g14		(bfd_vma, disassemble_info *);
376
377extern disassembler_ftype arc_get_disassembler (bfd *);
378extern disassembler_ftype cris_get_disassembler (bfd *);
379
380extern void print_aarch64_disassembler_options (FILE *);
381extern void print_i386_disassembler_options (FILE *);
382extern void print_mips_disassembler_options (FILE *);
383extern void print_nfp_disassembler_options (FILE *);
384extern void print_ppc_disassembler_options (FILE *);
385extern void print_riscv_disassembler_options (FILE *);
386extern void print_arm_disassembler_options (FILE *);
387extern void print_arc_disassembler_options (FILE *);
388extern void print_s390_disassembler_options (FILE *);
389extern void print_wasm32_disassembler_options (FILE *);
390extern void print_loongarch_disassembler_options (FILE *);
391extern bool aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
392extern bool arm_symbol_is_valid (asymbol *, struct disassemble_info *);
393extern bool csky_symbol_is_valid (asymbol *, struct disassemble_info *);
394extern bool riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
395extern void disassemble_init_powerpc (struct disassemble_info *);
396extern void disassemble_init_s390 (struct disassemble_info *);
397extern void disassemble_init_wasm32 (struct disassemble_info *);
398extern void disassemble_init_nds32 (struct disassemble_info *);
399extern const disasm_options_and_args_t *disassembler_options_arc (void);
400extern const disasm_options_and_args_t *disassembler_options_arm (void);
401extern const disasm_options_and_args_t *disassembler_options_mips (void);
402extern const disasm_options_and_args_t *disassembler_options_powerpc (void);
403extern const disasm_options_and_args_t *disassembler_options_riscv (void);
404extern const disasm_options_and_args_t *disassembler_options_s390 (void);
405
406/* Fetch the disassembler for a given architecture ARC, endianess (big
407   endian if BIG is true), bfd_mach value MACH, and ABFD, if that support
408   is available.  ABFD may be NULL.  */
409extern disassembler_ftype disassembler (enum bfd_architecture arc,
410					bool big, unsigned long mach,
411					bfd *abfd);
412
413/* Amend the disassemble_info structure as necessary for the target architecture.
414   Should only be called after initialising the info->arch field.  */
415extern void disassemble_init_for_target (struct disassemble_info *);
416
417/* Tidy any memory allocated by targets, such as info->private_data.  */
418extern void disassemble_free_target (struct disassemble_info *);
419
420/* Set the basic disassembler print functions.  */
421extern void disassemble_set_printf (struct disassemble_info *, void *,
422				    fprintf_ftype, fprintf_styled_ftype);
423
424/* Document any target specific options available from the disassembler.  */
425extern void disassembler_usage (FILE *);
426
427/* Remove whitespace and consecutive commas.  */
428extern char *remove_whitespace_and_extra_commas (char *);
429
430/* Like STRCMP, but treat ',' the same as '\0' so that we match
431   strings like "foobar" against "foobar,xxyyzz,...".  */
432extern int disassembler_options_cmp (const char *, const char *);
433
434/* A helper function for FOR_EACH_DISASSEMBLER_OPTION.  */
435static inline const char *
436next_disassembler_option (const char *options)
437{
438  const char *opt = strchr (options, ',');
439  if (opt != NULL)
440    opt++;
441  return opt;
442}
443
444/* A macro for iterating over each comma separated option in OPTIONS.  */
445#define FOR_EACH_DISASSEMBLER_OPTION(OPT, OPTIONS) \
446  for ((OPT) = (OPTIONS); \
447       (OPT) != NULL; \
448       (OPT) = next_disassembler_option (OPT))
449
450
451/* This block of definitions is for particular callers who read instructions
452   into a buffer before calling the instruction decoder.  */
453
454/* Here is a function which callers may wish to use for read_memory_func.
455   It gets bytes from a buffer.  */
456extern int buffer_read_memory
457  (bfd_vma, bfd_byte *, unsigned int, struct disassemble_info *);
458
459/* This function goes with buffer_read_memory.
460   It prints a message using info->fprintf_func and info->stream.  */
461extern void perror_memory (int, bfd_vma, struct disassemble_info *);
462
463
464/* Just print the address in hex.  This is included for completeness even
465   though both GDB and objdump provide their own (to print symbolic
466   addresses).  */
467extern void generic_print_address
468  (bfd_vma, struct disassemble_info *);
469
470/* Always NULL.  */
471extern asymbol *generic_symbol_at_address
472  (bfd_vma, struct disassemble_info *);
473
474/* Always true.  */
475extern bool generic_symbol_is_valid
476  (asymbol *, struct disassemble_info *);
477
478/* Method to initialize a disassemble_info struct.  This should be
479   called by all applications creating such a struct.  */
480extern void init_disassemble_info (struct disassemble_info *dinfo, void *stream,
481				   fprintf_ftype fprintf_func,
482				   fprintf_styled_ftype fprintf_styled_func);
483
484/* For compatibility with existing code.  */
485#define INIT_DISASSEMBLE_INFO(INFO, STREAM, FPRINTF_FUNC, FPRINTF_STYLED_FUNC)  \
486  init_disassemble_info (&(INFO), (STREAM), (fprintf_ftype) (FPRINTF_FUNC), \
487			 (fprintf_styled_ftype) (FPRINTF_STYLED_FUNC))
488
489#ifdef __cplusplus
490}
491#endif
492
493#endif /* ! defined (DIS_ASM_H) */
494