1/* strings -- print the strings of printable characters in files
2   Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
18   02110-1301, USA.  */
19
20/* Usage: strings [options] file...
21
22   Options:
23   --all
24   -a
25   -		Do not scan only the initialized data section of object files.
26
27   --print-file-name
28   -f		Print the name of the file before each string.
29
30   --bytes=min-len
31   -n min-len
32   -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
33		that are followed by a NUL or a newline.  Default is 4.
34
35   --radix={o,x,d}
36   -t {o,x,d}	Print the offset within the file before each string,
37		in octal/hex/decimal.
38
39   -o		Like -to.  (Some other implementations have -o like -to,
40		others like -td.  We chose one arbitrarily.)
41
42   --encoding={s,S,b,l,B,L}
43   -e {s,S,b,l,B,L}
44		Select character encoding: 7-bit-character, 8-bit-character,
45		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
46		littleendian 32-bit.
47
48   --target=BFDNAME
49		Specify a non-default object file format.
50
51   --help
52   -h		Print the usage message on the standard output.
53
54   --version
55   -v		Print the program version number.
56
57   Written by Richard Stallman <rms@gnu.ai.mit.edu>
58   and David MacKenzie <djm@gnu.ai.mit.edu>.  */
59
60#ifdef HAVE_CONFIG_H
61#include "config.h"
62#endif
63#include "bfd.h"
64#include <stdio.h>
65#include "getopt.h"
66#include <errno.h>
67#include "bucomm.h"
68#include "libiberty.h"
69#include "safe-ctype.h"
70#include <sys/stat.h>
71
72/* Some platforms need to put stdin into binary mode, to read
73    binary files.  */
74#ifdef HAVE_SETMODE
75#ifndef O_BINARY
76#ifdef _O_BINARY
77#define O_BINARY _O_BINARY
78#define setmode _setmode
79#else
80#define O_BINARY 0
81#endif
82#endif
83#if O_BINARY
84#include <io.h>
85#define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
86#endif
87#endif
88
89#define STRING_ISGRAPHIC(c) \
90      (   (c) >= 0 \
91       && (c) <= 255 \
92       && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
93
94#ifndef errno
95extern int errno;
96#endif
97
98/* The BFD section flags that identify an initialized data section.  */
99#define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
100
101#ifdef HAVE_FOPEN64
102typedef off64_t file_off;
103#define file_open(s,m) fopen64(s, m)
104#else
105typedef off_t file_off;
106#define file_open(s,m) fopen(s, m)
107#endif
108#ifdef HAVE_STAT64
109typedef struct stat64 statbuf;
110#define file_stat(f,s) stat64(f, s)
111#else
112typedef struct stat statbuf;
113#define file_stat(f,s) stat(f, s)
114#endif
115
116/* Radix for printing addresses (must be 8, 10 or 16).  */
117static int address_radix;
118
119/* Minimum length of sequence of graphic chars to trigger output.  */
120static int string_min;
121
122/* TRUE means print address within file for each string.  */
123static bfd_boolean print_addresses;
124
125/* TRUE means print filename for each string.  */
126static bfd_boolean print_filenames;
127
128/* TRUE means for object files scan only the data section.  */
129static bfd_boolean datasection_only;
130
131/* TRUE if we found an initialized data section in the current file.  */
132static bfd_boolean got_a_section;
133
134/* The BFD object file format.  */
135static char *target;
136
137/* The character encoding format.  */
138static char encoding;
139static int encoding_bytes;
140
141static struct option long_options[] =
142{
143  {"all", no_argument, NULL, 'a'},
144  {"print-file-name", no_argument, NULL, 'f'},
145  {"bytes", required_argument, NULL, 'n'},
146  {"radix", required_argument, NULL, 't'},
147  {"encoding", required_argument, NULL, 'e'},
148  {"target", required_argument, NULL, 'T'},
149  {"help", no_argument, NULL, 'h'},
150  {"version", no_argument, NULL, 'v'},
151  {NULL, 0, NULL, 0}
152};
153
154/* Records the size of a named file so that we
155   do not repeatedly run bfd_stat() on it.  */
156
157typedef struct
158{
159  const char *  filename;
160  bfd_size_type filesize;
161} filename_and_size_t;
162
163static void strings_a_section (bfd *, asection *, void *);
164static bfd_boolean strings_object_file (const char *);
165static bfd_boolean strings_file (char *file);
166static int integer_arg (char *s);
167static void print_strings (const char *, FILE *, file_off, int, int, char *);
168static void usage (FILE *, int);
169static long get_char (FILE *, file_off *, int *, char **);
170
171int main (int, char **);
172
173int
174main (int argc, char **argv)
175{
176  int optc;
177  int exit_status = 0;
178  bfd_boolean files_given = FALSE;
179
180#if defined (HAVE_SETLOCALE)
181  setlocale (LC_ALL, "");
182#endif
183  bindtextdomain (PACKAGE, LOCALEDIR);
184  textdomain (PACKAGE);
185
186  if (pledge ("stdio rpath", NULL) == -1)
187    fatal (_("Failed to pledge"));
188
189  program_name = argv[0];
190  xmalloc_set_program_name (program_name);
191
192  expandargv (&argc, &argv);
193
194  string_min = -1;
195  print_addresses = FALSE;
196  print_filenames = FALSE;
197  datasection_only = TRUE;
198  target = NULL;
199  encoding = 's';
200
201  while ((optc = getopt_long (argc, argv, "afhHn:ot:e:Vv0123456789",
202			      long_options, (int *) 0)) != EOF)
203    {
204      switch (optc)
205	{
206	case 'a':
207	  datasection_only = FALSE;
208	  break;
209
210	case 'f':
211	  print_filenames = TRUE;
212	  break;
213
214	case 'H':
215	case 'h':
216	  usage (stdout, 0);
217
218	case 'n':
219	  string_min = integer_arg (optarg);
220	  if (string_min < 1)
221	    fatal (_("invalid number %s"), optarg);
222	  break;
223
224	case 'o':
225	  print_addresses = TRUE;
226	  address_radix = 8;
227	  break;
228
229	case 't':
230	  print_addresses = TRUE;
231	  if (optarg[1] != '\0')
232	    usage (stderr, 1);
233	  switch (optarg[0])
234	    {
235	    case 'o':
236	      address_radix = 8;
237	      break;
238
239	    case 'd':
240	      address_radix = 10;
241	      break;
242
243	    case 'x':
244	      address_radix = 16;
245	      break;
246
247	    default:
248	      usage (stderr, 1);
249	    }
250	  break;
251
252	case 'T':
253	  target = optarg;
254	  break;
255
256	case 'e':
257	  if (optarg[1] != '\0')
258	    usage (stderr, 1);
259	  encoding = optarg[0];
260	  break;
261
262	case 'V':
263	case 'v':
264	  print_version ("strings");
265	  break;
266
267	case '?':
268	  usage (stderr, 1);
269
270	default:
271	  if (string_min < 0)
272	    string_min = optc - '0';
273	  else
274	    string_min = string_min * 10 + optc - '0';
275	  break;
276	}
277    }
278
279  if (string_min < 0)
280    string_min = 4;
281
282  switch (encoding)
283    {
284    case 'S':
285    case 's':
286      encoding_bytes = 1;
287      break;
288    case 'b':
289    case 'l':
290      encoding_bytes = 2;
291      break;
292    case 'B':
293    case 'L':
294      encoding_bytes = 4;
295      break;
296    default:
297      usage (stderr, 1);
298    }
299
300  bfd_init ();
301  set_default_bfd_target ();
302
303  if (optind >= argc)
304    {
305      datasection_only = FALSE;
306#ifdef SET_BINARY
307      SET_BINARY (fileno (stdin));
308#endif
309      print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
310      files_given = TRUE;
311    }
312  else
313    {
314      for (; optind < argc; ++optind)
315	{
316	  if (strcmp (argv[optind], "-") == 0)
317	    datasection_only = FALSE;
318	  else
319	    {
320	      files_given = TRUE;
321	      exit_status |= strings_file (argv[optind]) == FALSE;
322	    }
323	}
324    }
325
326  if (!files_given)
327    usage (stderr, 1);
328
329  return (exit_status);
330}
331
332/* Scan section SECT of the file ABFD, whose printable name is in
333   ARG->filename and whose size might be in ARG->filesize.  If it
334   contains initialized data set `got_a_section' and print the
335   strings in it.
336
337   FIXME: We ought to be able to return error codes/messages for
338   certain conditions.  */
339
340static void
341strings_a_section (bfd *abfd, asection *sect, void *arg)
342{
343  filename_and_size_t * filename_and_sizep;
344  bfd_size_type *filesizep;
345  bfd_size_type sectsize;
346  void *mem;
347
348  if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
349    return;
350
351  sectsize = bfd_get_section_size (sect);
352
353  if (sectsize <= 0)
354    return;
355
356  /* Get the size of the file.  This might have been cached for us.  */
357  filename_and_sizep = (filename_and_size_t *) arg;
358  filesizep = & filename_and_sizep->filesize;
359
360  if (*filesizep == 0)
361    {
362      struct stat st;
363
364      if (bfd_stat (abfd, &st))
365	return;
366
367      /* Cache the result so that we do not repeatedly stat this file.  */
368      *filesizep = st.st_size;
369    }
370
371  /* Compare the size of the section against the size of the file.
372     If the section is bigger then the file must be corrupt and
373     we should not try dumping it.  */
374  if (sectsize >= *filesizep)
375    return;
376
377  mem = xmalloc (sectsize);
378
379  if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
380    {
381      got_a_section = TRUE;
382
383      print_strings (filename_and_sizep->filename, NULL, sect->filepos,
384		     0, sectsize, mem);
385    }
386
387  free (mem);
388}
389
390/* Scan all of the sections in FILE, and print the strings
391   in the initialized data section(s).
392
393   Return TRUE if successful,
394   FALSE if not (such as if FILE is not an object file).  */
395
396static bfd_boolean
397strings_object_file (const char *file)
398{
399  filename_and_size_t filename_and_size;
400  bfd *abfd;
401
402  abfd = bfd_openr (file, target);
403
404  if (abfd == NULL)
405    /* Treat the file as a non-object file.  */
406    return FALSE;
407
408  /* This call is mainly for its side effect of reading in the sections.
409     We follow the traditional behavior of `strings' in that we don't
410     complain if we don't recognize a file to be an object file.  */
411  if (!bfd_check_format (abfd, bfd_object))
412    {
413      bfd_close (abfd);
414      return FALSE;
415    }
416
417  got_a_section = FALSE;
418  filename_and_size.filename = file;
419  filename_and_size.filesize = 0;
420  bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
421
422  if (!bfd_close (abfd))
423    {
424      bfd_nonfatal (file);
425      return FALSE;
426    }
427
428  return got_a_section;
429}
430
431/* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
432
433static bfd_boolean
434strings_file (char *file)
435{
436  statbuf st;
437
438  if (file_stat (file, &st) < 0)
439    {
440      if (errno == ENOENT)
441	non_fatal (_("'%s': No such file"), file);
442      else
443	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
444		   file, strerror (errno));
445      return FALSE;
446    }
447
448  /* If we weren't told to scan the whole file,
449     try to open it as an object file and only look at
450     initialized data sections.  If that fails, fall back to the
451     whole file.  */
452  if (!datasection_only || !strings_object_file (file))
453    {
454      FILE *stream;
455
456      stream = file_open (file, FOPEN_RB);
457      if (stream == NULL)
458	{
459	  fprintf (stderr, "%s: ", program_name);
460	  perror (file);
461	  return FALSE;
462	}
463
464      print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
465
466      if (fclose (stream) == EOF)
467	{
468	  fprintf (stderr, "%s: ", program_name);
469	  perror (file);
470	  return FALSE;
471	}
472    }
473
474  return TRUE;
475}
476
477/* Read the next character, return EOF if none available.
478   Assume that STREAM is positioned so that the next byte read
479   is at address ADDRESS in the file.
480
481   If STREAM is NULL, do not read from it.
482   The caller can supply a buffer of characters
483   to be processed before the data in STREAM.
484   MAGIC is the address of the buffer and
485   MAGICCOUNT is how many characters are in it.  */
486
487static long
488get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
489{
490  int c, i;
491  long r = EOF;
492  unsigned char buf[4];
493
494  for (i = 0; i < encoding_bytes; i++)
495    {
496      if (*magiccount)
497	{
498	  (*magiccount)--;
499	  c = *(*magic)++;
500	}
501      else
502	{
503	  if (stream == NULL)
504	    return EOF;
505
506	  /* Only use getc_unlocked if we found a declaration for it.
507	     Otherwise, libc is not thread safe by default, and we
508	     should not use it.  */
509
510#if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
511	  c = getc_unlocked (stream);
512#else
513	  c = getc (stream);
514#endif
515	  if (c == EOF)
516	    return EOF;
517	}
518
519      (*address)++;
520      buf[i] = c;
521    }
522
523  switch (encoding)
524    {
525    case 'S':
526    case 's':
527      r = buf[0];
528      break;
529    case 'b':
530      r = (buf[0] << 8) | buf[1];
531      break;
532    case 'l':
533      r = buf[0] | (buf[1] << 8);
534      break;
535    case 'B':
536      r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
537	((long) buf[2] << 8) | buf[3];
538      break;
539    case 'L':
540      r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
541	((long) buf[3] << 24);
542      break;
543    }
544
545  if (r == EOF)
546    return 0;
547
548  return r;
549}
550
551/* Find the strings in file FILENAME, read from STREAM.
552   Assume that STREAM is positioned so that the next byte read
553   is at address ADDRESS in the file.
554   Stop reading at address STOP_POINT in the file, if nonzero.
555
556   If STREAM is NULL, do not read from it.
557   The caller can supply a buffer of characters
558   to be processed before the data in STREAM.
559   MAGIC is the address of the buffer and
560   MAGICCOUNT is how many characters are in it.
561   Those characters come at address ADDRESS and the data in STREAM follow.  */
562
563static void
564print_strings (const char *filename, FILE *stream, file_off address,
565	       int stop_point, int magiccount, char *magic)
566{
567  char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
568
569  while (1)
570    {
571      file_off start;
572      int i;
573      long c;
574
575      /* See if the next `string_min' chars are all graphic chars.  */
576    tryline:
577      if (stop_point && address >= stop_point)
578	break;
579      start = address;
580      for (i = 0; i < string_min; i++)
581	{
582	  c = get_char (stream, &address, &magiccount, &magic);
583	  if (c == EOF)
584	    return;
585	  if (! STRING_ISGRAPHIC (c))
586	    /* Found a non-graphic.  Try again starting with next char.  */
587	    goto tryline;
588	  buf[i] = c;
589	}
590
591      /* We found a run of `string_min' graphic characters.  Print up
592	 to the next non-graphic character.  */
593
594      if (print_filenames)
595	printf ("%s: ", filename);
596      if (print_addresses)
597	switch (address_radix)
598	  {
599	  case 8:
600#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
601	    if (sizeof (start) > sizeof (long))
602	      printf ("%7Lo ", (unsigned long long) start);
603	    else
604#else
605# if !BFD_HOST_64BIT_LONG
606	    if (start != (unsigned long) start)
607	      printf ("++%7lo ", (unsigned long) start);
608	    else
609# endif
610#endif
611	      printf ("%7lo ", (unsigned long) start);
612	    break;
613
614	  case 10:
615#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
616	    if (sizeof (start) > sizeof (long))
617	      printf ("%7lld ", (unsigned long long) start);
618	    else
619#else
620# if !BFD_HOST_64BIT_LONG
621	    if (start != (unsigned long) start)
622	      printf ("++%7ld ", (unsigned long) start);
623	    else
624# endif
625#endif
626	      printf ("%7ld ", (long) start);
627	    break;
628
629	  case 16:
630#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
631	    if (sizeof (start) > sizeof (long))
632	      printf ("%7llx ", (unsigned long long) start);
633	    else
634#else
635# if !BFD_HOST_64BIT_LONG
636	    if (start != (unsigned long) start)
637	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
638		      (unsigned long) (start & 0xffffffff));
639	    else
640# endif
641#endif
642	      printf ("%7lx ", (unsigned long) start);
643	    break;
644	  }
645
646      buf[i] = '\0';
647      fputs (buf, stdout);
648
649      while (1)
650	{
651	  c = get_char (stream, &address, &magiccount, &magic);
652	  if (c == EOF)
653	    break;
654	  if (! STRING_ISGRAPHIC (c))
655	    break;
656	  putchar (c);
657	}
658
659      putchar ('\n');
660    }
661}
662
663/* Parse string S as an integer, using decimal radix by default,
664   but allowing octal and hex numbers as in C.  */
665
666static int
667integer_arg (char *s)
668{
669  int value;
670  int radix = 10;
671  char *p = s;
672  int c;
673
674  if (*p != '0')
675    radix = 10;
676  else if (*++p == 'x')
677    {
678      radix = 16;
679      p++;
680    }
681  else
682    radix = 8;
683
684  value = 0;
685  while (((c = *p++) >= '0' && c <= '9')
686	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
687    {
688      value *= radix;
689      if (c >= '0' && c <= '9')
690	value += c - '0';
691      else
692	value += (c & ~40) - 'A';
693    }
694
695  if (c == 'b')
696    value *= 512;
697  else if (c == 'B')
698    value *= 1024;
699  else
700    p--;
701
702  if (*p)
703    fatal (_("invalid integer argument %s"), s);
704
705  return value;
706}
707
708static void
709usage (FILE *stream, int status)
710{
711  fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
712  fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
713  fprintf (stream, _(" The options are:\n\
714  -a - --all                Scan the entire file, not just the data section\n\
715  -f --print-file-name      Print the name of the file before each string\n\
716  -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
717  -<number>                 least [number] characters (default 4).\n\
718  -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
719  -o                        An alias for --radix=o\n\
720  -T --target=<BFDNAME>     Specify the binary file format\n\
721  -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
722                            s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
723  @<file>                   Read options from <file>\n\
724  -h --help                 Display this information\n\
725  -v --version              Print the program's version number\n"));
726  list_supported_targets (program_name, stream);
727  if (status == 0)
728    fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
729  exit (status);
730}
731