1/* Generic BFD support for file formats.
2   Copyright (C) 1990-2022 Free Software Foundation, Inc.
3   Written by Cygnus Support.
4
5   This file is part of BFD, the Binary File Descriptor library.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20   MA 02110-1301, USA.  */
21
22
23/*
24SECTION
25	File formats
26
27	A format is a BFD concept of high level file contents type. The
28	formats supported by BFD are:
29
30	o <<bfd_object>>
31
32	The BFD may contain data, symbols, relocations and debug info.
33
34	o <<bfd_archive>>
35
36	The BFD contains other BFDs and an optional index.
37
38	o <<bfd_core>>
39
40	The BFD contains the result of an executable core dump.
41
42SUBSECTION
43	File format functions
44*/
45
46#include "sysdep.h"
47#include "bfd.h"
48#include "libbfd.h"
49
50/* IMPORT from targets.c.  */
51extern const size_t _bfd_target_vector_entries;
52
53/*
54FUNCTION
55	bfd_check_format
56
57SYNOPSIS
58	bool bfd_check_format (bfd *abfd, bfd_format format);
59
60DESCRIPTION
61	Verify if the file attached to the BFD @var{abfd} is compatible
62	with the format @var{format} (i.e., one of <<bfd_object>>,
63	<<bfd_archive>> or <<bfd_core>>).
64
65	If the BFD has been set to a specific target before the
66	call, only the named target and format combination is
67	checked. If the target has not been set, or has been set to
68	<<default>>, then all the known target backends is
69	interrogated to determine a match.  If the default target
70	matches, it is used.  If not, exactly one target must recognize
71	the file, or an error results.
72
73	The function returns <<TRUE>> on success, otherwise <<FALSE>>
74	with one of the following error codes:
75
76	o <<bfd_error_invalid_operation>> -
77	if <<format>> is not one of <<bfd_object>>, <<bfd_archive>> or
78	<<bfd_core>>.
79
80	o <<bfd_error_system_call>> -
81	if an error occured during a read - even some file mismatches
82	can cause bfd_error_system_calls.
83
84	o <<file_not_recognised>> -
85	none of the backends recognised the file format.
86
87	o <<bfd_error_file_ambiguously_recognized>> -
88	more than one backend recognised the file format.
89*/
90
91bool
92bfd_check_format (bfd *abfd, bfd_format format)
93{
94  return bfd_check_format_matches (abfd, format, NULL);
95}
96
97struct bfd_preserve
98{
99  void *marker;
100  void *tdata;
101  flagword flags;
102  const struct bfd_arch_info *arch_info;
103  struct bfd_section *sections;
104  struct bfd_section *section_last;
105  unsigned int section_count;
106  unsigned int section_id;
107  struct bfd_hash_table section_htab;
108  const struct bfd_build_id *build_id;
109  bfd_cleanup cleanup;
110};
111
112/* When testing an object for compatibility with a particular target
113   back-end, the back-end object_p function needs to set up certain
114   fields in the bfd on successfully recognizing the object.  This
115   typically happens in a piecemeal fashion, with failures possible at
116   many points.  On failure, the bfd is supposed to be restored to its
117   initial state, which is virtually impossible.  However, restoring a
118   subset of the bfd state works in practice.  This function stores
119   the subset.  */
120
121static bool
122bfd_preserve_save (bfd *abfd, struct bfd_preserve *preserve,
123		   bfd_cleanup cleanup)
124{
125  preserve->tdata = abfd->tdata.any;
126  preserve->arch_info = abfd->arch_info;
127  preserve->flags = abfd->flags;
128  preserve->sections = abfd->sections;
129  preserve->section_last = abfd->section_last;
130  preserve->section_count = abfd->section_count;
131  preserve->section_id = _bfd_section_id;
132  preserve->section_htab = abfd->section_htab;
133  preserve->marker = bfd_alloc (abfd, 1);
134  preserve->build_id = abfd->build_id;
135  preserve->cleanup = cleanup;
136  if (preserve->marker == NULL)
137    return false;
138
139  return bfd_hash_table_init (&abfd->section_htab, bfd_section_hash_newfunc,
140			      sizeof (struct section_hash_entry));
141}
142
143/* Clear out a subset of BFD state.  */
144
145static void
146bfd_reinit (bfd *abfd, unsigned int section_id, bfd_cleanup cleanup)
147{
148  _bfd_section_id = section_id;
149  if (cleanup)
150    cleanup (abfd);
151  abfd->tdata.any = NULL;
152  abfd->arch_info = &bfd_default_arch_struct;
153  abfd->flags &= BFD_FLAGS_SAVED;
154  abfd->build_id = NULL;
155  bfd_section_list_clear (abfd);
156}
157
158/* Restores bfd state saved by bfd_preserve_save.  */
159
160static bfd_cleanup
161bfd_preserve_restore (bfd *abfd, struct bfd_preserve *preserve)
162{
163  bfd_hash_table_free (&abfd->section_htab);
164
165  abfd->tdata.any = preserve->tdata;
166  abfd->arch_info = preserve->arch_info;
167  abfd->flags = preserve->flags;
168  abfd->section_htab = preserve->section_htab;
169  abfd->sections = preserve->sections;
170  abfd->section_last = preserve->section_last;
171  abfd->section_count = preserve->section_count;
172  _bfd_section_id = preserve->section_id;
173  abfd->build_id = preserve->build_id;
174
175  /* bfd_release frees all memory more recently bfd_alloc'd than
176     its arg, as well as its arg.  */
177  bfd_release (abfd, preserve->marker);
178  preserve->marker = NULL;
179  return preserve->cleanup;
180}
181
182/* Called when the bfd state saved by bfd_preserve_save is no longer
183   needed.  */
184
185static void
186bfd_preserve_finish (bfd *abfd ATTRIBUTE_UNUSED, struct bfd_preserve *preserve)
187{
188  if (preserve->cleanup)
189    {
190      /* Run the cleanup, assuming that all it will need is the
191	 tdata at the time the cleanup was returned.  */
192      void *tdata = abfd->tdata.any;
193      abfd->tdata.any = preserve->tdata;
194      preserve->cleanup (abfd);
195      abfd->tdata.any = tdata;
196    }
197  /* It would be nice to be able to free more memory here, eg. old
198     tdata, but that's not possible since these blocks are sitting
199     inside bfd_alloc'd memory.  The section hash is on a separate
200     objalloc.  */
201  bfd_hash_table_free (&preserve->section_htab);
202  preserve->marker = NULL;
203}
204
205/*
206FUNCTION
207	bfd_check_format_matches
208
209SYNOPSIS
210	bool bfd_check_format_matches
211	  (bfd *abfd, bfd_format format, char ***matching);
212
213DESCRIPTION
214	Like <<bfd_check_format>>, except when it returns FALSE with
215	<<bfd_errno>> set to <<bfd_error_file_ambiguously_recognized>>.  In that
216	case, if @var{matching} is not NULL, it will be filled in with
217	a NULL-terminated list of the names of the formats that matched,
218	allocated with <<malloc>>.
219	Then the user may choose a format and try again.
220
221	When done with the list that @var{matching} points to, the caller
222	should free it.
223*/
224
225bool
226bfd_check_format_matches (bfd *abfd, bfd_format format, char ***matching)
227{
228  extern const bfd_target binary_vec;
229#if BFD_SUPPORTS_PLUGINS
230  extern const bfd_target plugin_vec;
231#endif
232  const bfd_target * const *target;
233  const bfd_target **matching_vector = NULL;
234  const bfd_target *save_targ, *right_targ, *ar_right_targ, *match_targ;
235  int match_count, best_count, best_match;
236  int ar_match_index;
237  unsigned int initial_section_id = _bfd_section_id;
238  struct bfd_preserve preserve, preserve_match;
239  bfd_cleanup cleanup = NULL;
240
241  if (matching != NULL)
242    *matching = NULL;
243
244  if (!bfd_read_p (abfd)
245      || (unsigned int) abfd->format >= (unsigned int) bfd_type_end)
246    {
247      bfd_set_error (bfd_error_invalid_operation);
248      return false;
249    }
250
251  if (abfd->format != bfd_unknown)
252    return abfd->format == format;
253
254  if (matching != NULL || *bfd_associated_vector != NULL)
255    {
256      size_t amt;
257
258      amt = sizeof (*matching_vector) * 2 * _bfd_target_vector_entries;
259      matching_vector = (const bfd_target **) bfd_malloc (amt);
260      if (!matching_vector)
261	return false;
262    }
263
264  /* Presume the answer is yes.  */
265  abfd->format = format;
266  save_targ = abfd->xvec;
267
268  preserve_match.marker = NULL;
269  if (!bfd_preserve_save (abfd, &preserve, NULL))
270    goto err_ret;
271
272  /* If the target type was explicitly specified, just check that target.  */
273  if (!abfd->target_defaulted)
274    {
275      if (bfd_seek (abfd, (file_ptr) 0, SEEK_SET) != 0)	/* rewind! */
276	goto err_ret;
277
278      cleanup = BFD_SEND_FMT (abfd, _bfd_check_format, (abfd));
279
280      if (cleanup)
281	goto ok_ret;
282
283      /* For a long time the code has dropped through to check all
284	 targets if the specified target was wrong.  I don't know why,
285	 and I'm reluctant to change it.  However, in the case of an
286	 archive, it can cause problems.  If the specified target does
287	 not permit archives (e.g., the binary target), then we should
288	 not allow some other target to recognize it as an archive, but
289	 should instead allow the specified target to recognize it as an
290	 object.  When I first made this change, it broke the PE target,
291	 because the specified pei-i386 target did not recognize the
292	 actual pe-i386 archive.  Since there may be other problems of
293	 this sort, I changed this test to check only for the binary
294	 target.  */
295      if (format == bfd_archive && save_targ == &binary_vec)
296	goto err_unrecog;
297    }
298
299  /* Since the target type was defaulted, check them all in the hope
300     that one will be uniquely recognized.  */
301  right_targ = NULL;
302  ar_right_targ = NULL;
303  match_targ = NULL;
304  best_match = 256;
305  best_count = 0;
306  match_count = 0;
307  ar_match_index = _bfd_target_vector_entries;
308
309  for (target = bfd_target_vector; *target != NULL; target++)
310    {
311      void **high_water;
312
313      /* The binary target matches anything, so don't return it when
314	 searching.  Don't match the plugin target if we have another
315	 alternative since we want to properly set the input format
316	 before allowing a plugin to claim the file.  Also, don't
317	 check the default target twice.  */
318      if (*target == &binary_vec
319#if BFD_SUPPORTS_PLUGINS
320	  || (match_count != 0 && *target == &plugin_vec)
321#endif
322	  || (!abfd->target_defaulted && *target == save_targ))
323	continue;
324
325      /* If we already tried a match, the bfd is modified and may
326	 have sections attached, which will confuse the next
327	 _bfd_check_format call.  */
328      bfd_reinit (abfd, initial_section_id, cleanup);
329      /* Free bfd_alloc memory too.  If we have matched and preserved
330	 a target then the high water mark is that much higher.  */
331      if (preserve_match.marker)
332	high_water = &preserve_match.marker;
333      else
334	high_water = &preserve.marker;
335      bfd_release (abfd, *high_water);
336      *high_water = bfd_alloc (abfd, 1);
337
338      /* Change BFD's target temporarily.  */
339      abfd->xvec = *target;
340
341      if (bfd_seek (abfd, (file_ptr) 0, SEEK_SET) != 0)
342	goto err_ret;
343
344      cleanup = BFD_SEND_FMT (abfd, _bfd_check_format, (abfd));
345      if (cleanup)
346	{
347	  int match_priority = abfd->xvec->match_priority;
348#if BFD_SUPPORTS_PLUGINS
349	  /* If this object can be handled by a plugin, give that the
350	     lowest priority; objects both handled by a plugin and
351	     with an underlying object format will be claimed
352	     separately by the plugin.  */
353	  if (*target == &plugin_vec)
354	    match_priority = (*target)->match_priority;
355#endif
356
357	  if (abfd->format != bfd_archive
358	      || (bfd_has_map (abfd)
359		  && bfd_get_error () != bfd_error_wrong_object_format))
360	    {
361	      /* If this is the default target, accept it, even if
362		 other targets might match.  People who want those
363		 other targets have to set the GNUTARGET variable.  */
364	      if (abfd->xvec == bfd_default_vector[0])
365		goto ok_ret;
366
367	      if (matching_vector)
368		matching_vector[match_count] = abfd->xvec;
369	      match_count++;
370
371	      if (match_priority < best_match)
372		{
373		  best_match = match_priority;
374		  best_count = 0;
375		}
376	      if (match_priority <= best_match)
377		{
378		  /* This format checks out as ok!  */
379		  right_targ = abfd->xvec;
380		  best_count++;
381		}
382	    }
383	  else
384	    {
385	      /* An archive with no armap or objects of the wrong
386		 type.  We want this target to match if we get no
387		 better matches.  */
388	      if (ar_right_targ != bfd_default_vector[0])
389		ar_right_targ = *target;
390	      if (matching_vector)
391		matching_vector[ar_match_index] = *target;
392	      ar_match_index++;
393	    }
394
395	  if (preserve_match.marker == NULL)
396	    {
397	      match_targ = abfd->xvec;
398	      if (!bfd_preserve_save (abfd, &preserve_match, cleanup))
399		goto err_ret;
400	      cleanup = NULL;
401	    }
402	}
403    }
404
405  if (best_count == 1)
406    match_count = 1;
407
408  if (match_count == 0)
409    {
410      /* Try partial matches.  */
411      right_targ = ar_right_targ;
412
413      if (right_targ == bfd_default_vector[0])
414	{
415	  match_count = 1;
416	}
417      else
418	{
419	  match_count = ar_match_index - _bfd_target_vector_entries;
420
421	  if (matching_vector && match_count > 1)
422	    memcpy (matching_vector,
423		    matching_vector + _bfd_target_vector_entries,
424		    sizeof (*matching_vector) * match_count);
425	}
426    }
427
428  /* We have more than one equally good match.  If any of the best
429     matches is a target in config.bfd targ_defvec or targ_selvecs,
430     choose it.  */
431  if (match_count > 1)
432    {
433      const bfd_target * const *assoc = bfd_associated_vector;
434
435      while ((right_targ = *assoc++) != NULL)
436	{
437	  int i = match_count;
438
439	  while (--i >= 0)
440	    if (matching_vector[i] == right_targ
441		&& right_targ->match_priority <= best_match)
442	      break;
443
444	  if (i >= 0)
445	    {
446	      match_count = 1;
447	      break;
448	    }
449	}
450    }
451
452  /* We still have more than one equally good match, and at least some
453     of the targets support match priority.  Choose the first of the
454     best matches.  */
455  if (matching_vector && match_count > 1 && best_count != match_count)
456    {
457      int i;
458
459      for (i = 0; i < match_count; i++)
460	{
461	  right_targ = matching_vector[i];
462	  if (right_targ->match_priority <= best_match)
463	    break;
464	}
465      match_count = 1;
466    }
467
468  /* There is way too much undoing of half-known state here.  We
469     really shouldn't iterate on live bfd's.  Note that saving the
470     whole bfd and restoring it would be even worse; the first thing
471     you notice is that the cached bfd file position gets out of sync.  */
472  if (preserve_match.marker != NULL)
473    cleanup = bfd_preserve_restore (abfd, &preserve_match);
474
475  if (match_count == 1)
476    {
477      abfd->xvec = right_targ;
478      /* If we come out of the loop knowing that the last target that
479	 matched is the one we want, then ABFD should still be in a usable
480	 state (except possibly for XVEC).  This is not just an
481	 optimisation.  In the case of plugins a match against the
482	 plugin target can result in the bfd being changed such that
483	 it no longer matches the plugin target, nor will it match
484	 RIGHT_TARG again.  */
485      if (match_targ != right_targ)
486	{
487	  bfd_reinit (abfd, initial_section_id, cleanup);
488	  bfd_release (abfd, preserve.marker);
489	  if (bfd_seek (abfd, (file_ptr) 0, SEEK_SET) != 0)
490	    goto err_ret;
491	  cleanup = BFD_SEND_FMT (abfd, _bfd_check_format, (abfd));
492	  BFD_ASSERT (cleanup != NULL);
493	}
494
495    ok_ret:
496      /* If the file was opened for update, then `output_has_begun'
497	 some time ago when the file was created.  Do not recompute
498	 sections sizes or alignments in _bfd_set_section_contents.
499	 We can not set this flag until after checking the format,
500	 because it will interfere with creation of BFD sections.  */
501      if (abfd->direction == both_direction)
502	abfd->output_has_begun = true;
503
504      free (matching_vector);
505      if (preserve_match.marker != NULL)
506	bfd_preserve_finish (abfd, &preserve_match);
507      bfd_preserve_finish (abfd, &preserve);
508
509      /* File position has moved, BTW.  */
510      return true;
511    }
512
513  if (match_count == 0)
514    {
515    err_unrecog:
516      bfd_set_error (bfd_error_file_not_recognized);
517    err_ret:
518      if (cleanup)
519	cleanup (abfd);
520      abfd->xvec = save_targ;
521      abfd->format = bfd_unknown;
522      free (matching_vector);
523      if (preserve_match.marker != NULL)
524	bfd_preserve_finish (abfd, &preserve_match);
525      bfd_preserve_restore (abfd, &preserve);
526      return false;
527    }
528
529  /* Restore original target type and format.  */
530  abfd->xvec = save_targ;
531  abfd->format = bfd_unknown;
532  bfd_set_error (bfd_error_file_ambiguously_recognized);
533
534  if (matching)
535    {
536      *matching = (char **) matching_vector;
537      matching_vector[match_count] = NULL;
538      /* Return target names.  This is a little nasty.  Maybe we
539	 should do another bfd_malloc?  */
540      while (--match_count >= 0)
541	{
542	  const char *name = matching_vector[match_count]->name;
543	  *(const char **) &matching_vector[match_count] = name;
544	}
545    }
546  else
547    free (matching_vector);
548  if (cleanup)
549    cleanup (abfd);
550  if (preserve_match.marker != NULL)
551    bfd_preserve_finish (abfd, &preserve_match);
552  bfd_preserve_restore (abfd, &preserve);
553  return false;
554}
555
556/*
557FUNCTION
558	bfd_set_format
559
560SYNOPSIS
561	bool bfd_set_format (bfd *abfd, bfd_format format);
562
563DESCRIPTION
564	This function sets the file format of the BFD @var{abfd} to the
565	format @var{format}. If the target set in the BFD does not
566	support the format requested, the format is invalid, or the BFD
567	is not open for writing, then an error occurs.
568*/
569
570bool
571bfd_set_format (bfd *abfd, bfd_format format)
572{
573  if (bfd_read_p (abfd)
574      || (unsigned int) abfd->format >= (unsigned int) bfd_type_end)
575    {
576      bfd_set_error (bfd_error_invalid_operation);
577      return false;
578    }
579
580  if (abfd->format != bfd_unknown)
581    return abfd->format == format;
582
583  /* Presume the answer is yes.  */
584  abfd->format = format;
585
586  if (!BFD_SEND_FMT (abfd, _bfd_set_format, (abfd)))
587    {
588      abfd->format = bfd_unknown;
589      return false;
590    }
591
592  return true;
593}
594
595/*
596FUNCTION
597	bfd_format_string
598
599SYNOPSIS
600	const char *bfd_format_string (bfd_format format);
601
602DESCRIPTION
603	Return a pointer to a const string
604	<<invalid>>, <<object>>, <<archive>>, <<core>>, or <<unknown>>,
605	depending upon the value of @var{format}.
606*/
607
608const char *
609bfd_format_string (bfd_format format)
610{
611  if (((int) format < (int) bfd_unknown)
612      || ((int) format >= (int) bfd_type_end))
613    return "invalid";
614
615  switch (format)
616    {
617    case bfd_object:
618      return "object";		/* Linker/assembler/compiler output.  */
619    case bfd_archive:
620      return "archive";		/* Object archive file.  */
621    case bfd_core:
622      return "core";		/* Core dump.  */
623    default:
624      return "unknown";
625    }
626}
627