1/* strings -- print the strings of printable characters in files
2   Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
18   02110-1301, USA.  */
19
20/* Usage: strings [options] file...
21
22   Options:
23   --all
24   -a
25   -		Do not scan only the initialized data section of object files.
26
27   --print-file-name
28   -f		Print the name of the file before each string.
29
30   --bytes=min-len
31   -n min-len
32   -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
33		that are followed by a NUL or a newline.  Default is 4.
34
35   --radix={o,x,d}
36   -t {o,x,d}	Print the offset within the file before each string,
37		in octal/hex/decimal.
38
39   -o		Like -to.  (Some other implementations have -o like -to,
40		others like -td.  We chose one arbitrarily.)
41
42   --encoding={s,S,b,l,B,L}
43   -e {s,S,b,l,B,L}
44		Select character encoding: 7-bit-character, 8-bit-character,
45		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
46		littleendian 32-bit.
47
48   --target=BFDNAME
49   -T {bfdname}
50		Specify a non-default object file format.
51
52   --help
53   -h		Print the usage message on the standard output.
54
55   --version
56   -v		Print the program version number.
57
58   Written by Richard Stallman <rms@gnu.ai.mit.edu>
59   and David MacKenzie <djm@gnu.ai.mit.edu>.  */
60
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#include "bfd.h"
65#include <stdio.h>
66#include "getopt.h"
67#include <errno.h>
68#include "bucomm.h"
69#include "libiberty.h"
70#include "safe-ctype.h"
71#include <sys/stat.h>
72
73/* Some platforms need to put stdin into binary mode, to read
74    binary files.  */
75#ifdef HAVE_SETMODE
76#ifndef O_BINARY
77#ifdef _O_BINARY
78#define O_BINARY _O_BINARY
79#define setmode _setmode
80#else
81#define O_BINARY 0
82#endif
83#endif
84#if O_BINARY
85#include <io.h>
86#define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
87#endif
88#endif
89
90#define STRING_ISGRAPHIC(c) \
91      (   (c) >= 0 \
92       && (c) <= 255 \
93       && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
94
95#ifndef errno
96extern int errno;
97#endif
98
99/* The BFD section flags that identify an initialized data section.  */
100#define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
101
102#ifdef HAVE_FOPEN64
103typedef off64_t file_off;
104#define file_open(s,m) fopen64(s, m)
105#else
106typedef off_t file_off;
107#define file_open(s,m) fopen(s, m)
108#endif
109#if defined(HAVE_STAT64) && ! defined(__APPLE__)
110typedef struct stat64 statbuf;
111#define file_stat(f,s) stat64(f, s)
112#else
113typedef struct stat statbuf;
114#define file_stat(f,s) stat(f, s)
115#endif
116
117/* Radix for printing addresses (must be 8, 10 or 16).  */
118static int address_radix;
119
120/* Minimum length of sequence of graphic chars to trigger output.  */
121static int string_min;
122
123/* TRUE means print address within file for each string.  */
124static bfd_boolean print_addresses;
125
126/* TRUE means print filename for each string.  */
127static bfd_boolean print_filenames;
128
129/* TRUE means for object files scan only the data section.  */
130static bfd_boolean datasection_only;
131
132/* TRUE if we found an initialized data section in the current file.  */
133static bfd_boolean got_a_section;
134
135/* The BFD object file format.  */
136static char *target;
137
138/* The character encoding format.  */
139static char encoding;
140static int encoding_bytes;
141
142static struct option long_options[] =
143{
144  {"all", no_argument, NULL, 'a'},
145  {"print-file-name", no_argument, NULL, 'f'},
146  {"bytes", required_argument, NULL, 'n'},
147  {"radix", required_argument, NULL, 't'},
148  {"encoding", required_argument, NULL, 'e'},
149  {"target", required_argument, NULL, 'T'},
150  {"help", no_argument, NULL, 'h'},
151  {"version", no_argument, NULL, 'v'},
152  {NULL, 0, NULL, 0}
153};
154
155/* Records the size of a named file so that we
156   do not repeatedly run bfd_stat() on it.  */
157
158typedef struct
159{
160  const char *  filename;
161  bfd_size_type filesize;
162} filename_and_size_t;
163
164static void strings_a_section (bfd *, asection *, void *);
165static bfd_boolean strings_object_file (const char *);
166static bfd_boolean strings_file (char *file);
167static int integer_arg (char *s);
168static void print_strings (const char *, FILE *, file_off, int, int, char *);
169static void usage (FILE *, int);
170static long get_char (FILE *, file_off *, int *, char **);
171
172int main (int, char **);
173
174int
175main (int argc, char **argv)
176{
177  int optc;
178  int exit_status = 0;
179  bfd_boolean files_given = FALSE;
180
181#if defined (HAVE_SETLOCALE)
182  setlocale (LC_ALL, "");
183#endif
184  bindtextdomain (PACKAGE, LOCALEDIR);
185  textdomain (PACKAGE);
186
187  program_name = argv[0];
188  xmalloc_set_program_name (program_name);
189
190  expandargv (&argc, &argv);
191
192  string_min = -1;
193  print_addresses = FALSE;
194  print_filenames = FALSE;
195  datasection_only = TRUE;
196  target = NULL;
197  encoding = 's';
198
199  while ((optc = getopt_long (argc, argv, "afhHn:ot:e:T:Vv0123456789",
200			      long_options, (int *) 0)) != EOF)
201    {
202      switch (optc)
203	{
204	case 'a':
205	  datasection_only = FALSE;
206	  break;
207
208	case 'f':
209	  print_filenames = TRUE;
210	  break;
211
212	case 'H':
213	case 'h':
214	  usage (stdout, 0);
215
216	case 'n':
217	  string_min = integer_arg (optarg);
218	  if (string_min < 1)
219	    fatal (_("invalid number %s"), optarg);
220	  break;
221
222	case 'o':
223	  print_addresses = TRUE;
224	  address_radix = 8;
225	  break;
226
227	case 't':
228	  print_addresses = TRUE;
229	  if (optarg[1] != '\0')
230	    usage (stderr, 1);
231	  switch (optarg[0])
232	    {
233	    case 'o':
234	      address_radix = 8;
235	      break;
236
237	    case 'd':
238	      address_radix = 10;
239	      break;
240
241	    case 'x':
242	      address_radix = 16;
243	      break;
244
245	    default:
246	      usage (stderr, 1);
247	    }
248	  break;
249
250	case 'T':
251	  target = optarg;
252	  break;
253
254	case 'e':
255	  if (optarg[1] != '\0')
256	    usage (stderr, 1);
257	  encoding = optarg[0];
258	  break;
259
260	case 'V':
261	case 'v':
262	  print_version ("strings");
263	  break;
264
265	case '?':
266	  usage (stderr, 1);
267
268	default:
269	  if (string_min < 0)
270	    string_min = optc - '0';
271	  else
272	    string_min = string_min * 10 + optc - '0';
273	  break;
274	}
275    }
276
277  if (string_min < 0)
278    string_min = 4;
279
280  switch (encoding)
281    {
282    case 'S':
283    case 's':
284      encoding_bytes = 1;
285      break;
286    case 'b':
287    case 'l':
288      encoding_bytes = 2;
289      break;
290    case 'B':
291    case 'L':
292      encoding_bytes = 4;
293      break;
294    default:
295      usage (stderr, 1);
296    }
297
298  bfd_init ();
299  set_default_bfd_target ();
300
301  if (optind >= argc)
302    {
303      datasection_only = FALSE;
304#ifdef SET_BINARY
305      SET_BINARY (fileno (stdin));
306#endif
307      print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
308      files_given = TRUE;
309    }
310  else
311    {
312      for (; optind < argc; ++optind)
313	{
314	  if (strcmp (argv[optind], "-") == 0)
315	    datasection_only = FALSE;
316	  else
317	    {
318	      files_given = TRUE;
319	      exit_status |= strings_file (argv[optind]) == FALSE;
320	    }
321	}
322    }
323
324  if (!files_given)
325    usage (stderr, 1);
326
327  return (exit_status);
328}
329
330/* Scan section SECT of the file ABFD, whose printable name is in
331   ARG->filename and whose size might be in ARG->filesize.  If it
332   contains initialized data set `got_a_section' and print the
333   strings in it.
334
335   FIXME: We ought to be able to return error codes/messages for
336   certain conditions.  */
337
338static void
339strings_a_section (bfd *abfd, asection *sect, void *arg)
340{
341  filename_and_size_t * filename_and_sizep;
342  bfd_size_type *filesizep;
343  bfd_size_type sectsize;
344  void *mem;
345
346  if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
347    return;
348
349  sectsize = bfd_get_section_size (sect);
350
351  if (sectsize <= 0)
352    return;
353
354  /* Get the size of the file.  This might have been cached for us.  */
355  filename_and_sizep = (filename_and_size_t *) arg;
356  filesizep = & filename_and_sizep->filesize;
357
358  if (*filesizep == 0)
359    {
360      struct stat st;
361
362      if (bfd_stat (abfd, &st))
363	return;
364
365      /* Cache the result so that we do not repeatedly stat this file.  */
366      *filesizep = st.st_size;
367    }
368
369  /* Compare the size of the section against the size of the file.
370     If the section is bigger then the file must be corrupt and
371     we should not try dumping it.  */
372  if (sectsize >= *filesizep)
373    return;
374
375  mem = xmalloc (sectsize);
376
377  if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
378    {
379      got_a_section = TRUE;
380
381      print_strings (filename_and_sizep->filename, NULL, sect->filepos,
382		     0, sectsize, mem);
383    }
384
385  free (mem);
386}
387
388/* Scan all of the sections in FILE, and print the strings
389   in the initialized data section(s).
390
391   Return TRUE if successful,
392   FALSE if not (such as if FILE is not an object file).  */
393
394static bfd_boolean
395strings_object_file (const char *file)
396{
397  filename_and_size_t filename_and_size;
398  bfd *abfd;
399
400  abfd = bfd_openr (file, target);
401
402  if (abfd == NULL)
403    /* Treat the file as a non-object file.  */
404    return FALSE;
405
406  /* This call is mainly for its side effect of reading in the sections.
407     We follow the traditional behavior of `strings' in that we don't
408     complain if we don't recognize a file to be an object file.  */
409  if (!bfd_check_format (abfd, bfd_object))
410    {
411      bfd_close (abfd);
412      return FALSE;
413    }
414
415  got_a_section = FALSE;
416  filename_and_size.filename = file;
417  filename_and_size.filesize = 0;
418  bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
419
420  if (!bfd_close (abfd))
421    {
422      bfd_nonfatal (file);
423      return FALSE;
424    }
425
426  return got_a_section;
427}
428
429/* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
430
431static bfd_boolean
432strings_file (char *file)
433{
434  statbuf st;
435
436  if (file_stat (file, &st) < 0)
437    {
438      if (errno == ENOENT)
439	non_fatal (_("'%s': No such file"), file);
440      else
441	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
442		   file, strerror (errno));
443      return FALSE;
444    }
445
446  /* If we weren't told to scan the whole file,
447     try to open it as an object file and only look at
448     initialized data sections.  If that fails, fall back to the
449     whole file.  */
450  if (!datasection_only || !strings_object_file (file))
451    {
452      FILE *stream;
453
454      stream = file_open (file, FOPEN_RB);
455      if (stream == NULL)
456	{
457	  fprintf (stderr, "%s: ", program_name);
458	  perror (file);
459	  return FALSE;
460	}
461
462      print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
463
464      if (fclose (stream) == EOF)
465	{
466	  fprintf (stderr, "%s: ", program_name);
467	  perror (file);
468	  return FALSE;
469	}
470    }
471
472  return TRUE;
473}
474
475/* Read the next character, return EOF if none available.
476   Assume that STREAM is positioned so that the next byte read
477   is at address ADDRESS in the file.
478
479   If STREAM is NULL, do not read from it.
480   The caller can supply a buffer of characters
481   to be processed before the data in STREAM.
482   MAGIC is the address of the buffer and
483   MAGICCOUNT is how many characters are in it.  */
484
485static long
486get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
487{
488  int c, i;
489  long r = EOF;
490  unsigned char buf[4];
491
492  for (i = 0; i < encoding_bytes; i++)
493    {
494      if (*magiccount)
495	{
496	  (*magiccount)--;
497	  c = *(*magic)++;
498	}
499      else
500	{
501	  if (stream == NULL)
502	    return EOF;
503
504	  /* Only use getc_unlocked if we found a declaration for it.
505	     Otherwise, libc is not thread safe by default, and we
506	     should not use it.  */
507
508#if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
509	  c = getc_unlocked (stream);
510#else
511	  c = getc (stream);
512#endif
513	  if (c == EOF)
514	    return EOF;
515	}
516
517      (*address)++;
518      buf[i] = c;
519    }
520
521  switch (encoding)
522    {
523    case 'S':
524    case 's':
525      r = buf[0];
526      break;
527    case 'b':
528      r = (buf[0] << 8) | buf[1];
529      break;
530    case 'l':
531      r = buf[0] | (buf[1] << 8);
532      break;
533    case 'B':
534      r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
535	((long) buf[2] << 8) | buf[3];
536      break;
537    case 'L':
538      r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
539	((long) buf[3] << 24);
540      break;
541    }
542
543  if (r == EOF)
544    return 0;
545
546  return r;
547}
548
549/* Find the strings in file FILENAME, read from STREAM.
550   Assume that STREAM is positioned so that the next byte read
551   is at address ADDRESS in the file.
552   Stop reading at address STOP_POINT in the file, if nonzero.
553
554   If STREAM is NULL, do not read from it.
555   The caller can supply a buffer of characters
556   to be processed before the data in STREAM.
557   MAGIC is the address of the buffer and
558   MAGICCOUNT is how many characters are in it.
559   Those characters come at address ADDRESS and the data in STREAM follow.  */
560
561static void
562print_strings (const char *filename, FILE *stream, file_off address,
563	       int stop_point, int magiccount, char *magic)
564{
565  char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
566
567  while (1)
568    {
569      file_off start;
570      int i;
571      long c;
572
573      /* See if the next `string_min' chars are all graphic chars.  */
574    tryline:
575      if (stop_point && address >= stop_point)
576	break;
577      start = address;
578      for (i = 0; i < string_min; i++)
579	{
580	  c = get_char (stream, &address, &magiccount, &magic);
581	  if (c == EOF)
582	    return;
583	  if (! STRING_ISGRAPHIC (c))
584	    /* Found a non-graphic.  Try again starting with next char.  */
585	    goto tryline;
586	  buf[i] = c;
587	}
588
589      /* We found a run of `string_min' graphic characters.  Print up
590	 to the next non-graphic character.  */
591
592      if (print_filenames)
593	printf ("%s: ", filename);
594      if (print_addresses)
595	switch (address_radix)
596	  {
597	  case 8:
598#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
599	    if (sizeof (start) > sizeof (long))
600	      printf ("%7llo ", (unsigned long long) start);
601	    else
602#else
603# if !BFD_HOST_64BIT_LONG
604	    if (start != (unsigned long) start)
605	      printf ("++%7lo ", (unsigned long) start);
606	    else
607# endif
608#endif
609	      printf ("%7lo ", (unsigned long) start);
610	    break;
611
612	  case 10:
613#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
614	    if (sizeof (start) > sizeof (long))
615	      printf ("%7lld ", (unsigned long long) start);
616	    else
617#else
618# if !BFD_HOST_64BIT_LONG
619	    if (start != (unsigned long) start)
620	      printf ("++%7ld ", (unsigned long) start);
621	    else
622# endif
623#endif
624	      printf ("%7ld ", (long) start);
625	    break;
626
627	  case 16:
628#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
629	    if (sizeof (start) > sizeof (long))
630	      printf ("%7llx ", (unsigned long long) start);
631	    else
632#else
633# if !BFD_HOST_64BIT_LONG
634	    if (start != (unsigned long) start)
635	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
636		      (unsigned long) (start & 0xffffffff));
637	    else
638# endif
639#endif
640	      printf ("%7lx ", (unsigned long) start);
641	    break;
642	  }
643
644      buf[i] = '\0';
645      fputs (buf, stdout);
646
647      while (1)
648	{
649	  c = get_char (stream, &address, &magiccount, &magic);
650	  if (c == EOF)
651	    break;
652	  if (! STRING_ISGRAPHIC (c))
653	    break;
654	  putchar (c);
655	}
656
657      putchar ('\n');
658    }
659}
660
661/* Parse string S as an integer, using decimal radix by default,
662   but allowing octal and hex numbers as in C.  */
663
664static int
665integer_arg (char *s)
666{
667  int value;
668  int radix = 10;
669  char *p = s;
670  int c;
671
672  if (*p != '0')
673    radix = 10;
674  else if (*++p == 'x')
675    {
676      radix = 16;
677      p++;
678    }
679  else
680    radix = 8;
681
682  value = 0;
683  while (((c = *p++) >= '0' && c <= '9')
684	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
685    {
686      value *= radix;
687      if (c >= '0' && c <= '9')
688	value += c - '0';
689      else
690	value += (c & ~40) - 'A';
691    }
692
693  if (c == 'b')
694    value *= 512;
695  else if (c == 'B')
696    value *= 1024;
697  else
698    p--;
699
700  if (*p)
701    fatal (_("invalid integer argument %s"), s);
702
703  return value;
704}
705
706static void
707usage (FILE *stream, int status)
708{
709  fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
710  fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
711  fprintf (stream, _(" The options are:\n\
712  -a - --all                Scan the entire file, not just the data section\n\
713  -f --print-file-name      Print the name of the file before each string\n\
714  -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
715  -<number>                 least [number] characters (default 4).\n\
716  -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
717  -o                        An alias for --radix=o\n\
718  -T --target=<BFDNAME>     Specify the binary file format\n\
719  -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
720                            s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
721  @<file>                   Read options from <file>\n\
722  -h --help                 Display this information\n\
723  -v --version              Print the program's version number\n"));
724  list_supported_targets (program_name, stream);
725  if (status == 0)
726    fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
727  exit (status);
728}
729