strings.c revision 89865
1/* strings -- print the strings of printable characters in files
2   Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18   02111-1307, USA.  */
19
20/* $FreeBSD: head/contrib/binutils/binutils/strings.c 89865 2002-01-27 12:12:53Z obrien $ */
21
22/* Usage: strings [options] file...
23
24   Options:
25   --all
26   -a
27   -		Do not scan only the initialized data section of object files.
28
29   --print-file-name
30   -f		Print the name of the file before each string.
31
32   --bytes=min-len
33   -n min-len
34   -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35		that are followed by a NUL or a newline.  Default is 4.
36
37   --radix={o,x,d}
38   -t {o,x,d}	Print the offset within the file before each string,
39		in octal/hex/decimal.
40
41   -o		Like -to.  (Some other implementations have -o like -to,
42		others like -td.  We chose one arbitrarily.)
43
44   --encoding={s,b,l,B,L}
45   -e {s,b,l,B,L}
46		Select character encoding: single-byte, bigendian 16-bit,
47		littleendian 16-bit, bigendian 32-bit, littleendian 32-bit
48
49   --target=BFDNAME
50		Specify a non-default object file format.
51
52   --help
53   -h		Print the usage message on the standard output.
54
55   --version
56   -v		Print the program version number.
57
58   Written by Richard Stallman <rms@gnu.ai.mit.edu>
59   and David MacKenzie <djm@gnu.ai.mit.edu>.  */
60
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#include "bfd.h"
65#include <stdio.h>
66#include <getopt.h>
67#include <errno.h>
68#include "bucomm.h"
69#include "libiberty.h"
70#include "safe-ctype.h"
71
72/* Some platforms need to put stdin into binary mode, to read
73    binary files.  */
74#ifdef HAVE_SETMODE
75#ifndef O_BINARY
76#ifdef _O_BINARY
77#define O_BINARY _O_BINARY
78#define setmode _setmode
79#else
80#define O_BINARY 0
81#endif
82#endif
83#if O_BINARY
84#include <io.h>
85#define SET_BINARY(f) do { if (!isatty(f)) setmode(f,O_BINARY); } while (0)
86#endif
87#endif
88
89#define isgraphic(c) (ISPRINT (c) || (c) == '\t')
90
91#ifndef errno
92extern int errno;
93#endif
94
95/* The BFD section flags that identify an initialized data section.  */
96#define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
97
98#ifdef HAVE_FOPEN64
99typedef off64_t file_off;
100#define file_open(s,m) fopen64(s,m)
101#else
102typedef off_t file_off;
103#define file_open(s,m) fopen(s,m)
104#endif
105
106/* Radix for printing addresses (must be 8, 10 or 16).  */
107static int address_radix;
108
109/* Minimum length of sequence of graphic chars to trigger output.  */
110static int string_min;
111
112/* true means print address within file for each string.  */
113static boolean print_addresses;
114
115/* true means print filename for each string.  */
116static boolean print_filenames;
117
118/* true means for object files scan only the data section.  */
119static boolean datasection_only;
120
121/* true if we found an initialized data section in the current file.  */
122static boolean got_a_section;
123
124/* The BFD object file format.  */
125static char *target;
126
127/* The character encoding format.  */
128static char encoding;
129static int encoding_bytes;
130
131static struct option long_options[] =
132{
133  {"all", no_argument, NULL, 'a'},
134  {"print-file-name", no_argument, NULL, 'f'},
135  {"bytes", required_argument, NULL, 'n'},
136  {"radix", required_argument, NULL, 't'},
137  {"encoding", required_argument, NULL, 'e'},
138  {"target", required_argument, NULL, 'T'},
139  {"help", no_argument, NULL, 'h'},
140  {"version", no_argument, NULL, 'v'},
141  {NULL, 0, NULL, 0}
142};
143
144static void strings_a_section PARAMS ((bfd *, asection *, PTR));
145static boolean strings_object_file PARAMS ((const char *));
146static boolean strings_file PARAMS ((char *file));
147static int integer_arg PARAMS ((char *s));
148static void print_strings PARAMS ((const char *filename, FILE *stream,
149				  file_off address, int stop_point,
150				  int magiccount, char *magic));
151static void usage PARAMS ((FILE *stream, int status));
152static long get_char PARAMS ((FILE *stream, file_off *address,
153			      int *magiccount, char **magic));
154
155int main PARAMS ((int, char **));
156
157int
158main (argc, argv)
159     int argc;
160     char **argv;
161{
162  int optc;
163  int exit_status = 0;
164  boolean files_given = false;
165
166#if defined (HAVE_SETLOCALE)
167  setlocale (LC_ALL, "");
168#endif
169  bindtextdomain (PACKAGE, LOCALEDIR);
170  textdomain (PACKAGE);
171
172  program_name = argv[0];
173  xmalloc_set_program_name (program_name);
174  string_min = -1;
175  print_addresses = false;
176  print_filenames = false;
177  datasection_only = true;
178  target = NULL;
179  encoding = 's';
180
181  while ((optc = getopt_long (argc, argv, "afhHn:ot:e:Vv0123456789",
182			      long_options, (int *) 0)) != EOF)
183    {
184      switch (optc)
185	{
186	case 'a':
187	  datasection_only = false;
188	  break;
189
190	case 'f':
191	  print_filenames = true;
192	  break;
193
194	case 'H':
195	case 'h':
196	  usage (stdout, 0);
197
198	case 'n':
199	  string_min = integer_arg (optarg);
200	  if (string_min < 1)
201	    {
202	      fatal (_("invalid number %s"), optarg);
203	    }
204	  break;
205
206	case 'o':
207	  print_addresses = true;
208	  address_radix = 8;
209	  break;
210
211	case 't':
212	  print_addresses = true;
213	  if (optarg[1] != '\0')
214	    usage (stderr, 1);
215	  switch (optarg[0])
216	    {
217	    case 'o':
218	      address_radix = 8;
219	      break;
220
221	    case 'd':
222	      address_radix = 10;
223	      break;
224
225	    case 'x':
226	      address_radix = 16;
227	      break;
228
229	    default:
230	      usage (stderr, 1);
231	    }
232	  break;
233
234	case 'T':
235	  target = optarg;
236	  break;
237
238	case 'e':
239	  if (optarg[1] != '\0')
240	    usage (stderr, 1);
241	  encoding = optarg[0];
242	  break;
243
244	case 'V':
245	case 'v':
246	  print_version ("strings");
247	  break;
248
249	case '?':
250	  usage (stderr, 1);
251
252	default:
253	  if (string_min < 0)
254	    string_min = optc - '0';
255	  else
256	    string_min = string_min * 10 + optc - '0';
257	  break;
258	}
259    }
260
261  if (string_min < 0)
262    string_min = 4;
263
264  switch (encoding)
265    {
266    case 's':
267      encoding_bytes = 1;
268      break;
269    case 'b':
270    case 'l':
271      encoding_bytes = 2;
272      break;
273    case 'B':
274    case 'L':
275      encoding_bytes = 4;
276      break;
277    default:
278      usage (stderr, 1);
279    }
280
281  bfd_init ();
282  set_default_bfd_target ();
283
284  if (optind >= argc)
285    {
286      datasection_only = false;
287#ifdef SET_BINARY
288      SET_BINARY (fileno (stdin));
289#endif
290      print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
291      files_given = true;
292    }
293  else
294    {
295      for (; optind < argc; ++optind)
296	{
297	  if (strcmp (argv[optind], "-") == 0)
298	    datasection_only = false;
299	  else
300	    {
301	      files_given = true;
302	      exit_status |= (strings_file (argv[optind]) == false);
303	    }
304	}
305    }
306
307  if (files_given == false)
308    usage (stderr, 1);
309
310  return (exit_status);
311}
312
313/* Scan section SECT of the file ABFD, whose printable name is FILE.
314   If it contains initialized data,
315   set `got_a_section' and print the strings in it.  */
316
317static void
318strings_a_section (abfd, sect, filearg)
319     bfd *abfd;
320     asection *sect;
321     PTR filearg;
322{
323  const char *file = (const char *) filearg;
324
325  if ((sect->flags & DATA_FLAGS) == DATA_FLAGS)
326    {
327      bfd_size_type sz = bfd_get_section_size_before_reloc (sect);
328      PTR mem = xmalloc (sz);
329      if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sz))
330	{
331	  got_a_section = true;
332	  print_strings (file, (FILE *) NULL, sect->filepos, 0, sz, mem);
333	}
334      free (mem);
335    }
336}
337
338/* Scan all of the sections in FILE, and print the strings
339   in the initialized data section(s).
340
341   Return true if successful,
342   false if not (such as if FILE is not an object file).  */
343
344static boolean
345strings_object_file (file)
346     const char *file;
347{
348  bfd *abfd = bfd_openr (file, target);
349
350  if (abfd == NULL)
351    {
352      /* Treat the file as a non-object file.  */
353      return false;
354    }
355
356  /* This call is mainly for its side effect of reading in the sections.
357     We follow the traditional behavior of `strings' in that we don't
358     complain if we don't recognize a file to be an object file.  */
359  if (bfd_check_format (abfd, bfd_object) == false)
360    {
361      bfd_close (abfd);
362      return false;
363    }
364
365  got_a_section = false;
366  bfd_map_over_sections (abfd, strings_a_section, (PTR) file);
367
368  if (!bfd_close (abfd))
369    {
370      bfd_nonfatal (file);
371      return false;
372    }
373
374  return got_a_section;
375}
376
377/* Print the strings in FILE.  Return true if ok, false if an error occurs.  */
378
379static boolean
380strings_file (file)
381     char *file;
382{
383  /* If we weren't told to scan the whole file,
384     try to open it as an object file and only look at
385     initialized data sections.  If that fails, fall back to the
386     whole file.  */
387  if (!datasection_only || !strings_object_file (file))
388    {
389      FILE *stream;
390
391      stream = file_open (file, FOPEN_RB);
392      if (stream == NULL)
393	{
394	  fprintf (stderr, "%s: ", program_name);
395	  perror (file);
396	  return false;
397	}
398
399      print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
400
401      if (fclose (stream) == EOF)
402	{
403	  fprintf (stderr, "%s: ", program_name);
404	  perror (file);
405	  return false;
406	}
407    }
408
409  return true;
410}
411
412/* Read the next character, return EOF if none available.
413   Assume that STREAM is positioned so that the next byte read
414   is at address ADDRESS in the file.
415
416   If STREAM is NULL, do not read from it.
417   The caller can supply a buffer of characters
418   to be processed before the data in STREAM.
419   MAGIC is the address of the buffer and
420   MAGICCOUNT is how many characters are in it.  */
421
422static long
423get_char (stream, address, magiccount, magic)
424     FILE *stream;
425     file_off *address;
426     int *magiccount;
427     char **magic;
428{
429  int c, i;
430  long r = EOF;
431  unsigned char buf[4];
432
433  for (i = 0; i < encoding_bytes; i++)
434    {
435      if (*magiccount)
436	{
437	  (*magiccount)--;
438	  c = *(*magic)++;
439	}
440      else
441	{
442	  if (stream == NULL)
443	    return EOF;
444#ifdef HAVE_GETC_UNLOCKED
445	  c = getc_unlocked (stream);
446#else
447	  c = getc (stream);
448#endif
449	  if (c == EOF)
450	    return EOF;
451	}
452
453      (*address)++;
454      buf[i] = c;
455    }
456
457  switch (encoding)
458    {
459    case 's':
460      r = buf[0];
461      break;
462    case 'b':
463      r = (buf[0] << 8) | buf[1];
464      break;
465    case 'l':
466      r = buf[0] | (buf[1] << 8);
467      break;
468    case 'B':
469      r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
470	((long) buf[2] << 8) | buf[3];
471      break;
472    case 'L':
473      r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
474	((long) buf[3] << 24);
475      break;
476    }
477
478  if (r == EOF)
479    return 0;
480
481  return r;
482}
483
484/* Find the strings in file FILENAME, read from STREAM.
485   Assume that STREAM is positioned so that the next byte read
486   is at address ADDRESS in the file.
487   Stop reading at address STOP_POINT in the file, if nonzero.
488
489   If STREAM is NULL, do not read from it.
490   The caller can supply a buffer of characters
491   to be processed before the data in STREAM.
492   MAGIC is the address of the buffer and
493   MAGICCOUNT is how many characters are in it.
494   Those characters come at address ADDRESS and the data in STREAM follow.  */
495
496static void
497print_strings (filename, stream, address, stop_point, magiccount, magic)
498     const char *filename;
499     FILE *stream;
500     file_off address;
501     int stop_point;
502     int magiccount;
503     char *magic;
504{
505  char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
506
507  while (1)
508    {
509      file_off start;
510      int i;
511      long c;
512
513      /* See if the next `string_min' chars are all graphic chars.  */
514    tryline:
515      if (stop_point && address >= stop_point)
516	break;
517      start = address;
518      for (i = 0; i < string_min; i++)
519	{
520	  c = get_char (stream, &address, &magiccount, &magic);
521	  if (c == EOF)
522	    return;
523	  if (c > 255 || c < 0 || !isgraphic (c))
524	    /* Found a non-graphic.  Try again starting with next char.  */
525	    goto tryline;
526	  buf[i] = c;
527	}
528
529      /* We found a run of `string_min' graphic characters.  Print up
530         to the next non-graphic character.  */
531
532      if (print_filenames)
533	printf ("%s: ", filename);
534      if (print_addresses)
535	switch (address_radix)
536	  {
537	  case 8:
538#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
539	    if (sizeof (start) > sizeof (long))
540	      printf ("%7Lo ", (unsigned long long) start);
541	    else
542#else
543# if !BFD_HOST_64BIT_LONG
544	    if (start != (unsigned long) start)
545	      printf ("++%7lo ", (unsigned long) start);
546	    else
547# endif
548#endif
549	      printf ("%7lo ", (unsigned long) start);
550	    break;
551
552	  case 10:
553#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
554	    if (sizeof (start) > sizeof (long))
555	      printf ("%7Ld ", (unsigned long long) start);
556	    else
557#else
558# if !BFD_HOST_64BIT_LONG
559	    if (start != (unsigned long) start)
560	      printf ("++%7ld ", (unsigned long) start);
561	    else
562# endif
563#endif
564	      printf ("%7ld ", (long) start);
565	    break;
566
567	  case 16:
568#if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
569	    if (sizeof (start) > sizeof (long))
570	      printf ("%7Lx ", (unsigned long long) start);
571	    else
572#else
573# if !BFD_HOST_64BIT_LONG
574	    if (start != (unsigned long) start)
575	      printf ("%lx%8.8lx ", start >> 32, start & 0xffffffff);
576	    else
577# endif
578#endif
579	      printf ("%7lx ", (unsigned long) start);
580	    break;
581	  }
582
583      buf[i] = '\0';
584      fputs (buf, stdout);
585
586      while (1)
587	{
588	  c = get_char (stream, &address, &magiccount, &magic);
589	  if (c == EOF)
590	    break;
591	  if (c > 255 || c < 0 || !isgraphic (c))
592	    break;
593	  putchar (c);
594	}
595
596      putchar ('\n');
597    }
598}
599
600/* Parse string S as an integer, using decimal radix by default,
601   but allowing octal and hex numbers as in C.  */
602
603static int
604integer_arg (s)
605     char *s;
606{
607  int value;
608  int radix = 10;
609  char *p = s;
610  int c;
611
612  if (*p != '0')
613    radix = 10;
614  else if (*++p == 'x')
615    {
616      radix = 16;
617      p++;
618    }
619  else
620    radix = 8;
621
622  value = 0;
623  while (((c = *p++) >= '0' && c <= '9')
624	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
625    {
626      value *= radix;
627      if (c >= '0' && c <= '9')
628	value += c - '0';
629      else
630	value += (c & ~40) - 'A';
631    }
632
633  if (c == 'b')
634    value *= 512;
635  else if (c == 'B')
636    value *= 1024;
637  else
638    p--;
639
640  if (*p)
641    {
642      fatal (_("invalid integer argument %s"), s);
643    }
644  return value;
645}
646
647static void
648usage (stream, status)
649     FILE *stream;
650     int status;
651{
652  fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
653  fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
654  fprintf (stream, _(" The options are:\n\
655  -a - --all                Scan the entire file, not just the data section\n\
656  -f --print-file-name      Print the name of the file before each string\n\
657  -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
658  -<number>                 least [number] characters (default 4).\n\
659  -t --radix={o,x,d}        Print the location of the string in base 8, 10 or 16\n\
660  -o                        An alias for --radix=o\n\
661  -T --target=<BFDNAME>     Specify the binary file format\n\
662  -e --encoding={s,b,l,B,L} Select character size and endianness:\n\
663                            s = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
664  -h --help                 Display this information\n\
665  -v --version              Print the program's version number\n"));
666  list_supported_targets (program_name, stream);
667  if (status == 0)
668    fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
669  exit (status);
670}
671