1/* strings -- print the strings of printable characters in files
2   Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
18   02110-1301, USA.  */
19
20/* Usage: strings [options] file...
21
22   Options:
23   --all
24   -a
25   -		Do not scan only the initialized data section of object files.
26
27   --print-file-name
28   -f		Print the name of the file before each string.
29
30   --bytes=min-len
31   -n min-len
32   -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
33		that are followed by a NUL or a newline.  Default is 4.
34
35   --radix={o,x,d}
36   -t {o,x,d}	Print the offset within the file before each string,
37		in octal/hex/decimal.
38
39   -o		Like -to.  (Some other implementations have -o like -to,
40		others like -td.  We chose one arbitrarily.)
41
42   --encoding={s,S,b,l,B,L}
43   -e {s,S,b,l,B,L}
44		Select character encoding: 7-bit-character, 8-bit-character,
45		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
46		littleendian 32-bit.
47
48   --target=BFDNAME
49   -T {bfdname}
50		Specify a non-default object file format.
51
52   --help
53   -h		Print the usage message on the standard output.
54
55   --version
56   -v		Print the program version number.
57
58   Written by Richard Stallman <rms@gnu.ai.mit.edu>
59   and David MacKenzie <djm@gnu.ai.mit.edu>.  */
60
61#include "sysdep.h"
62#include "bfd.h"
63#include "getopt.h"
64#include "libiberty.h"
65#include "safe-ctype.h"
66#include <sys/stat.h>
67#include "bucomm.h"
68
69/* Some platforms need to put stdin into binary mode, to read
70    binary files.  */
71#ifdef HAVE_SETMODE
72#ifndef O_BINARY
73#ifdef _O_BINARY
74#define O_BINARY _O_BINARY
75#define setmode _setmode
76#else
77#define O_BINARY 0
78#endif
79#endif
80#if O_BINARY
81#include <io.h>
82#define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
83#endif
84#endif
85
86#define STRING_ISGRAPHIC(c) \
87      (   (c) >= 0 \
88       && (c) <= 255 \
89       && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
90
91#ifndef errno
92extern int errno;
93#endif
94
95/* The BFD section flags that identify an initialized data section.  */
96#define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
97
98#ifdef HAVE_FOPEN64
99typedef off64_t file_off;
100#define file_open(s,m) fopen64(s, m)
101#else
102typedef off_t file_off;
103#define file_open(s,m) fopen(s, m)
104#endif
105#ifdef HAVE_STAT64
106typedef struct stat64 statbuf;
107#define file_stat(f,s) stat64(f, s)
108#else
109typedef struct stat statbuf;
110#define file_stat(f,s) stat(f, s)
111#endif
112
113/* Radix for printing addresses (must be 8, 10 or 16).  */
114static int address_radix;
115
116/* Minimum length of sequence of graphic chars to trigger output.  */
117static int string_min;
118
119/* TRUE means print address within file for each string.  */
120static bfd_boolean print_addresses;
121
122/* TRUE means print filename for each string.  */
123static bfd_boolean print_filenames;
124
125/* TRUE means for object files scan only the data section.  */
126static bfd_boolean datasection_only;
127
128/* TRUE if we found an initialized data section in the current file.  */
129static bfd_boolean got_a_section;
130
131/* The BFD object file format.  */
132static char *target;
133
134/* The character encoding format.  */
135static char encoding;
136static int encoding_bytes;
137
138static struct option long_options[] =
139{
140  {"all", no_argument, NULL, 'a'},
141  {"print-file-name", no_argument, NULL, 'f'},
142  {"bytes", required_argument, NULL, 'n'},
143  {"radix", required_argument, NULL, 't'},
144  {"encoding", required_argument, NULL, 'e'},
145  {"target", required_argument, NULL, 'T'},
146  {"help", no_argument, NULL, 'h'},
147  {"version", no_argument, NULL, 'v'},
148  {NULL, 0, NULL, 0}
149};
150
151/* Records the size of a named file so that we
152   do not repeatedly run bfd_stat() on it.  */
153
154typedef struct
155{
156  const char *  filename;
157  bfd_size_type filesize;
158} filename_and_size_t;
159
160static void strings_a_section (bfd *, asection *, void *);
161static bfd_boolean strings_object_file (const char *);
162static bfd_boolean strings_file (char *file);
163static int integer_arg (char *s);
164static void print_strings (const char *, FILE *, file_off, int, int, char *);
165static void usage (FILE *, int);
166static long get_char (FILE *, file_off *, int *, char **);
167
168int main (int, char **);
169
170int
171main (int argc, char **argv)
172{
173  int optc;
174  int exit_status = 0;
175  bfd_boolean files_given = FALSE;
176
177#if defined (HAVE_SETLOCALE)
178  setlocale (LC_ALL, "");
179#endif
180  bindtextdomain (PACKAGE, LOCALEDIR);
181  textdomain (PACKAGE);
182
183  program_name = argv[0];
184  xmalloc_set_program_name (program_name);
185
186  expandargv (&argc, &argv);
187
188  string_min = -1;
189  print_addresses = FALSE;
190  print_filenames = FALSE;
191  datasection_only = TRUE;
192  target = NULL;
193  encoding = 's';
194
195  while ((optc = getopt_long (argc, argv, "afhHn:ot:e:T:Vv0123456789",
196			      long_options, (int *) 0)) != EOF)
197    {
198      switch (optc)
199	{
200	case 'a':
201	  datasection_only = FALSE;
202	  break;
203
204	case 'f':
205	  print_filenames = TRUE;
206	  break;
207
208	case 'H':
209	case 'h':
210	  usage (stdout, 0);
211
212	case 'n':
213	  string_min = integer_arg (optarg);
214	  if (string_min < 1)
215	    fatal (_("invalid number %s"), optarg);
216	  break;
217
218	case 'o':
219	  print_addresses = TRUE;
220	  address_radix = 8;
221	  break;
222
223	case 't':
224	  print_addresses = TRUE;
225	  if (optarg[1] != '\0')
226	    usage (stderr, 1);
227	  switch (optarg[0])
228	    {
229	    case 'o':
230	      address_radix = 8;
231	      break;
232
233	    case 'd':
234	      address_radix = 10;
235	      break;
236
237	    case 'x':
238	      address_radix = 16;
239	      break;
240
241	    default:
242	      usage (stderr, 1);
243	    }
244	  break;
245
246	case 'T':
247	  target = optarg;
248	  break;
249
250	case 'e':
251	  if (optarg[1] != '\0')
252	    usage (stderr, 1);
253	  encoding = optarg[0];
254	  break;
255
256	case 'V':
257	case 'v':
258	  print_version ("strings");
259	  break;
260
261	case '?':
262	  usage (stderr, 1);
263
264	default:
265	  if (string_min < 0)
266	    string_min = optc - '0';
267	  else
268	    string_min = string_min * 10 + optc - '0';
269	  break;
270	}
271    }
272
273  if (string_min < 0)
274    string_min = 4;
275
276  switch (encoding)
277    {
278    case 'S':
279    case 's':
280      encoding_bytes = 1;
281      break;
282    case 'b':
283    case 'l':
284      encoding_bytes = 2;
285      break;
286    case 'B':
287    case 'L':
288      encoding_bytes = 4;
289      break;
290    default:
291      usage (stderr, 1);
292    }
293
294  bfd_init ();
295  set_default_bfd_target ();
296
297  if (optind >= argc)
298    {
299      datasection_only = FALSE;
300#ifdef SET_BINARY
301      SET_BINARY (fileno (stdin));
302#endif
303      print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
304      files_given = TRUE;
305    }
306  else
307    {
308      for (; optind < argc; ++optind)
309	{
310	  if (strcmp (argv[optind], "-") == 0)
311	    datasection_only = FALSE;
312	  else
313	    {
314	      files_given = TRUE;
315	      exit_status |= strings_file (argv[optind]) == FALSE;
316	    }
317	}
318    }
319
320  if (!files_given)
321    usage (stderr, 1);
322
323  return (exit_status);
324}
325
326/* Scan section SECT of the file ABFD, whose printable name is in
327   ARG->filename and whose size might be in ARG->filesize.  If it
328   contains initialized data set `got_a_section' and print the
329   strings in it.
330
331   FIXME: We ought to be able to return error codes/messages for
332   certain conditions.  */
333
334static void
335strings_a_section (bfd *abfd, asection *sect, void *arg)
336{
337  filename_and_size_t * filename_and_sizep;
338  bfd_size_type *filesizep;
339  bfd_size_type sectsize;
340  void *mem;
341
342  if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
343    return;
344
345  sectsize = bfd_get_section_size (sect);
346
347  if (sectsize <= 0)
348    return;
349
350  /* Get the size of the file.  This might have been cached for us.  */
351  filename_and_sizep = (filename_and_size_t *) arg;
352  filesizep = & filename_and_sizep->filesize;
353
354  if (*filesizep == 0)
355    {
356      struct stat st;
357
358      if (bfd_stat (abfd, &st))
359	return;
360
361      /* Cache the result so that we do not repeatedly stat this file.  */
362      *filesizep = st.st_size;
363    }
364
365  /* Compare the size of the section against the size of the file.
366     If the section is bigger then the file must be corrupt and
367     we should not try dumping it.  */
368  if (sectsize >= *filesizep)
369    return;
370
371  mem = xmalloc (sectsize);
372
373  if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
374    {
375      got_a_section = TRUE;
376
377      print_strings (filename_and_sizep->filename, NULL, sect->filepos,
378		     0, sectsize, mem);
379    }
380
381  free (mem);
382}
383
384/* Scan all of the sections in FILE, and print the strings
385   in the initialized data section(s).
386
387   Return TRUE if successful,
388   FALSE if not (such as if FILE is not an object file).  */
389
390static bfd_boolean
391strings_object_file (const char *file)
392{
393  filename_and_size_t filename_and_size;
394  bfd *abfd;
395
396  abfd = bfd_openr (file, target);
397
398  if (abfd == NULL)
399    /* Treat the file as a non-object file.  */
400    return FALSE;
401
402  /* This call is mainly for its side effect of reading in the sections.
403     We follow the traditional behavior of `strings' in that we don't
404     complain if we don't recognize a file to be an object file.  */
405  if (!bfd_check_format (abfd, bfd_object))
406    {
407      bfd_close (abfd);
408      return FALSE;
409    }
410
411  got_a_section = FALSE;
412  filename_and_size.filename = file;
413  filename_and_size.filesize = 0;
414  bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
415
416  if (!bfd_close (abfd))
417    {
418      bfd_nonfatal (file);
419      return FALSE;
420    }
421
422  return got_a_section;
423}
424
425/* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
426
427static bfd_boolean
428strings_file (char *file)
429{
430  statbuf st;
431
432  if (file_stat (file, &st) < 0)
433    {
434      if (errno == ENOENT)
435	non_fatal (_("'%s': No such file"), file);
436      else
437	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
438		   file, strerror (errno));
439      return FALSE;
440    }
441
442  /* If we weren't told to scan the whole file,
443     try to open it as an object file and only look at
444     initialized data sections.  If that fails, fall back to the
445     whole file.  */
446  if (!datasection_only || !strings_object_file (file))
447    {
448      FILE *stream;
449
450      stream = file_open (file, FOPEN_RB);
451      if (stream == NULL)
452	{
453	  fprintf (stderr, "%s: ", program_name);
454	  perror (file);
455	  return FALSE;
456	}
457
458      print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
459
460      if (fclose (stream) == EOF)
461	{
462	  fprintf (stderr, "%s: ", program_name);
463	  perror (file);
464	  return FALSE;
465	}
466    }
467
468  return TRUE;
469}
470
471/* Read the next character, return EOF if none available.
472   Assume that STREAM is positioned so that the next byte read
473   is at address ADDRESS in the file.
474
475   If STREAM is NULL, do not read from it.
476   The caller can supply a buffer of characters
477   to be processed before the data in STREAM.
478   MAGIC is the address of the buffer and
479   MAGICCOUNT is how many characters are in it.  */
480
481static long
482get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
483{
484  int c, i;
485  long r = EOF;
486  unsigned char buf[4];
487
488  for (i = 0; i < encoding_bytes; i++)
489    {
490      if (*magiccount)
491	{
492	  (*magiccount)--;
493	  c = *(*magic)++;
494	}
495      else
496	{
497	  if (stream == NULL)
498	    return EOF;
499
500	  /* Only use getc_unlocked if we found a declaration for it.
501	     Otherwise, libc is not thread safe by default, and we
502	     should not use it.  */
503
504#if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
505	  c = getc_unlocked (stream);
506#else
507	  c = getc (stream);
508#endif
509	  if (c == EOF)
510	    return EOF;
511	}
512
513      (*address)++;
514      buf[i] = c;
515    }
516
517  switch (encoding)
518    {
519    case 'S':
520    case 's':
521      r = buf[0];
522      break;
523    case 'b':
524      r = (buf[0] << 8) | buf[1];
525      break;
526    case 'l':
527      r = buf[0] | (buf[1] << 8);
528      break;
529    case 'B':
530      r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
531	((long) buf[2] << 8) | buf[3];
532      break;
533    case 'L':
534      r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
535	((long) buf[3] << 24);
536      break;
537    }
538
539  if (r == EOF)
540    return 0;
541
542  return r;
543}
544
545/* Find the strings in file FILENAME, read from STREAM.
546   Assume that STREAM is positioned so that the next byte read
547   is at address ADDRESS in the file.
548   Stop reading at address STOP_POINT in the file, if nonzero.
549
550   If STREAM is NULL, do not read from it.
551   The caller can supply a buffer of characters
552   to be processed before the data in STREAM.
553   MAGIC is the address of the buffer and
554   MAGICCOUNT is how many characters are in it.
555   Those characters come at address ADDRESS and the data in STREAM follow.  */
556
557static void
558print_strings (const char *filename, FILE *stream, file_off address,
559	       int stop_point, int magiccount, char *magic)
560{
561  char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
562
563  while (1)
564    {
565      file_off start;
566      int i;
567      long c;
568
569      /* See if the next `string_min' chars are all graphic chars.  */
570    tryline:
571      if (stop_point && address >= stop_point)
572	break;
573      start = address;
574      for (i = 0; i < string_min; i++)
575	{
576	  c = get_char (stream, &address, &magiccount, &magic);
577	  if (c == EOF)
578	    return;
579	  if (! STRING_ISGRAPHIC (c))
580	    /* Found a non-graphic.  Try again starting with next char.  */
581	    goto tryline;
582	  buf[i] = c;
583	}
584
585      /* We found a run of `string_min' graphic characters.  Print up
586	 to the next non-graphic character.  */
587
588      if (print_filenames)
589	printf ("%s: ", filename);
590      if (print_addresses)
591	switch (address_radix)
592	  {
593	  case 8:
594#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
595	    if (sizeof (start) > sizeof (long))
596	      printf ("%7llo ", (unsigned long long) start);
597	    else
598#else
599# if !BFD_HOST_64BIT_LONG
600	    if (start != (unsigned long) start)
601	      printf ("++%7lo ", (unsigned long) start);
602	    else
603# endif
604#endif
605	      printf ("%7lo ", (unsigned long) start);
606	    break;
607
608	  case 10:
609#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
610	    if (sizeof (start) > sizeof (long))
611	      printf ("%7lld ", (unsigned long long) start);
612	    else
613#else
614# if !BFD_HOST_64BIT_LONG
615	    if (start != (unsigned long) start)
616	      printf ("++%7ld ", (unsigned long) start);
617	    else
618# endif
619#endif
620	      printf ("%7ld ", (long) start);
621	    break;
622
623	  case 16:
624#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
625	    if (sizeof (start) > sizeof (long))
626	      printf ("%7llx ", (unsigned long long) start);
627	    else
628#else
629# if !BFD_HOST_64BIT_LONG
630	    if (start != (unsigned long) start)
631	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
632		      (unsigned long) (start & 0xffffffff));
633	    else
634# endif
635#endif
636	      printf ("%7lx ", (unsigned long) start);
637	    break;
638	  }
639
640      buf[i] = '\0';
641      fputs (buf, stdout);
642
643      while (1)
644	{
645	  c = get_char (stream, &address, &magiccount, &magic);
646	  if (c == EOF)
647	    break;
648	  if (! STRING_ISGRAPHIC (c))
649	    break;
650	  putchar (c);
651	}
652
653      putchar ('\n');
654    }
655}
656
657/* Parse string S as an integer, using decimal radix by default,
658   but allowing octal and hex numbers as in C.  */
659
660static int
661integer_arg (char *s)
662{
663  int value;
664  int radix = 10;
665  char *p = s;
666  int c;
667
668  if (*p != '0')
669    radix = 10;
670  else if (*++p == 'x')
671    {
672      radix = 16;
673      p++;
674    }
675  else
676    radix = 8;
677
678  value = 0;
679  while (((c = *p++) >= '0' && c <= '9')
680	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
681    {
682      value *= radix;
683      if (c >= '0' && c <= '9')
684	value += c - '0';
685      else
686	value += (c & ~40) - 'A';
687    }
688
689  if (c == 'b')
690    value *= 512;
691  else if (c == 'B')
692    value *= 1024;
693  else
694    p--;
695
696  if (*p)
697    fatal (_("invalid integer argument %s"), s);
698
699  return value;
700}
701
702static void
703usage (FILE *stream, int status)
704{
705  fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
706  fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
707  fprintf (stream, _(" The options are:\n\
708  -a - --all                Scan the entire file, not just the data section\n\
709  -f --print-file-name      Print the name of the file before each string\n\
710  -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
711  -<number>                 least [number] characters (default 4).\n\
712  -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
713  -o                        An alias for --radix=o\n\
714  -T --target=<BFDNAME>     Specify the binary file format\n\
715  -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
716                            s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
717  @<file>                   Read options from <file>\n\
718  -h --help                 Display this information\n\
719  -v --version              Print the program's version number\n"));
720  list_supported_targets (program_name, stream);
721  if (REPORT_BUGS_TO[0] && status == 0)
722    fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
723  exit (status);
724}
725