1/*-
2 * Copyright (c) 2007 S.Sam Arun Raj
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/types.h>
28#include <sys/stat.h>
29
30#include <ctype.h>
31#include <err.h>
32#include <errno.h>
33#include <fcntl.h>
34#include <getopt.h>
35#include <inttypes.h>
36#include <stdint.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <sysexits.h>
41#include <unistd.h>
42
43#include <libelf.h>
44#include <libelftc.h>
45#include <gelf.h>
46
47#include "_elftc.h"
48
49ELFTC_VCSID("$Id: strings.c 3648 2018-11-22 23:26:43Z emaste $");
50
51enum radix_style {
52	RADIX_DECIMAL,
53	RADIX_HEX,
54	RADIX_OCTAL
55};
56
57enum encoding_style {
58	ENCODING_7BIT,
59	ENCODING_8BIT,
60	ENCODING_16BIT_BIG,
61	ENCODING_16BIT_LITTLE,
62	ENCODING_32BIT_BIG,
63	ENCODING_32BIT_LITTLE
64};
65
66#define PRINTABLE(c)						\
67      ((c) >= 0 && (c) <= 255 &&				\
68	  ((c) == '\t' || isprint((c)) ||			\
69	      (encoding == ENCODING_8BIT && (c) > 127)))
70
71static int encoding_size, entire_file, show_filename, show_loc;
72static enum encoding_style encoding;
73static enum radix_style radix;
74static intmax_t min_len;
75
76static struct option strings_longopts[] = {
77	{ "all",		no_argument,		NULL,	'a'},
78	{ "bytes",		required_argument,	NULL,	'n'},
79	{ "encoding",		required_argument,	NULL,	'e'},
80	{ "help",		no_argument,		NULL,	'h'},
81	{ "print-file-name",	no_argument,		NULL,	'f'},
82	{ "radix",		required_argument,	NULL,	't'},
83	{ "version",		no_argument,		NULL,	'v'},
84	{ NULL, 0, NULL, 0 }
85};
86
87int	getcharacter(FILE *, long *);
88int	handle_file(const char *);
89int	handle_elf(const char *, FILE *);
90int	handle_binary(const char *, FILE *, size_t);
91int	find_strings(const char *, FILE *, off_t, off_t);
92void	show_version(void);
93void	usage(void);
94
95/*
96 * strings(1) extracts text(contiguous printable characters)
97 * from elf and binary files.
98 */
99int
100main(int argc, char **argv)
101{
102	int ch, rc;
103
104	rc = 0;
105	min_len = 0;
106	encoding_size = 1;
107	if (elf_version(EV_CURRENT) == EV_NONE)
108		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
109		    elf_errmsg(-1));
110
111	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
112	    strings_longopts, NULL)) != -1) {
113		switch ((char)ch) {
114		case 'a':
115			entire_file = 1;
116			break;
117		case 'e':
118			if (*optarg == 's') {
119				encoding = ENCODING_7BIT;
120			} else if (*optarg == 'S') {
121				encoding = ENCODING_8BIT;
122			} else if (*optarg == 'b') {
123				encoding = ENCODING_16BIT_BIG;
124				encoding_size = 2;
125			} else if (*optarg == 'B') {
126				encoding = ENCODING_32BIT_BIG;
127				encoding_size = 4;
128			} else if (*optarg == 'l') {
129				encoding = ENCODING_16BIT_LITTLE;
130				encoding_size = 2;
131			} else if (*optarg == 'L') {
132				encoding = ENCODING_32BIT_LITTLE;
133				encoding_size = 4;
134			} else
135				usage();
136			        /* NOTREACHED */
137			break;
138		case 'f':
139			show_filename = 1;
140			break;
141		case 'n':
142			min_len = strtoimax(optarg, (char**)NULL, 10);
143			if (min_len <= 0)
144				errx(EX_USAGE, "option -n should specify a "
145				    "positive decimal integer.");
146			break;
147		case 'o':
148			show_loc = 1;
149			radix = RADIX_OCTAL;
150			break;
151		case 't':
152			show_loc = 1;
153			if (*optarg == 'd')
154				radix = RADIX_DECIMAL;
155			else if (*optarg == 'o')
156				radix = RADIX_OCTAL;
157			else if (*optarg == 'x')
158				radix = RADIX_HEX;
159			else
160				usage();
161			        /* NOTREACHED */
162			break;
163		case 'v':
164		case 'V':
165			show_version();
166			/* NOTREACHED */
167		case '0':
168	        case '1':
169		case '2':
170		case '3':
171		case '4':
172		case '5':
173		case '6':
174		case '7':
175		case '8':
176		case '9':
177			min_len *= 10;
178			min_len += ch - '0';
179			break;
180		case 'h':
181		case '?':
182		default:
183			usage();
184			/* NOTREACHED */
185		}
186	}
187	argc -= optind;
188	argv += optind;
189
190	if (min_len == 0)
191		min_len = 4;
192	if (*argv == NULL)
193		rc = find_strings("{standard input}", stdin, 0, 0);
194	else while (*argv != NULL) {
195		if (handle_file(*argv) != 0)
196			rc = 1;
197		argv++;
198	}
199	return (rc);
200}
201
202int
203handle_file(const char *name)
204{
205	FILE *pfile;
206	int rt;
207
208	if (name == NULL)
209		return (1);
210	pfile = fopen(name, "rb");
211	if (pfile == NULL) {
212		warnx("'%s': %s", name, strerror(errno));
213		return (1);
214	}
215
216	rt = handle_elf(name, pfile);
217	fclose(pfile);
218	return (rt);
219}
220
221/*
222 * Files not understood by handle_elf, will be passed off here and will
223 * treated as a binary file. This would include text file, core dumps ...
224 */
225int
226handle_binary(const char *name, FILE *pfile, size_t size)
227{
228
229	(void)fseeko(pfile, 0, SEEK_SET);
230	return (find_strings(name, pfile, 0, size));
231}
232
233/*
234 * Will analyse a file to see if it ELF, other files including ar(1),
235 * core dumps are passed off and treated as flat binary files. Unlike
236 * GNU size in FreeBSD this routine will not treat ELF object from
237 * different archs as flat binary files(has to overridden using -a).
238 */
239int
240handle_elf(const char *name, FILE *pfile)
241{
242	struct stat buf;
243	GElf_Ehdr elfhdr;
244	GElf_Shdr shdr;
245	Elf *elf;
246	Elf_Scn *scn;
247	int rc, fd;
248
249	rc = 0;
250	fd = fileno(pfile);
251	if (fstat(fd, &buf) < 0)
252		return (1);
253
254	/* If entire file is chosen, treat it as a binary file */
255	if (entire_file)
256		return (handle_binary(name, pfile, buf.st_size));
257
258	(void)lseek(fd, 0, SEEK_SET);
259	elf = elf_begin(fd, ELF_C_READ, NULL);
260	if (elf_kind(elf) != ELF_K_ELF) {
261		(void)elf_end(elf);
262		return (handle_binary(name, pfile, buf.st_size));
263	}
264
265	if (gelf_getehdr(elf, &elfhdr) == NULL) {
266		(void)elf_end(elf);
267		warnx("%s: ELF file could not be processed", name);
268		return (1);
269	}
270
271	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
272		(void)elf_end(elf);
273		return (handle_binary(name, pfile, buf.st_size));
274	} else {
275		scn = NULL;
276		while ((scn = elf_nextscn(elf, scn)) != NULL) {
277			if (gelf_getshdr(scn, &shdr) == NULL)
278				continue;
279			if (shdr.sh_type != SHT_NOBITS &&
280			    (shdr.sh_flags & SHF_ALLOC) != 0) {
281				rc = find_strings(name, pfile, shdr.sh_offset,
282				    shdr.sh_size);
283			}
284		}
285	}
286	(void)elf_end(elf);
287	return (rc);
288}
289
290/*
291 * Retrieves a character from input stream based on the encoding
292 * type requested.
293 */
294int
295getcharacter(FILE *pfile, long *rt)
296{
297	int i, c;
298	char buf[4];
299
300	for(i = 0; i < encoding_size; i++) {
301		c = getc(pfile);
302		if (c == EOF)
303			return (-1);
304		buf[i] = c;
305	}
306
307	switch (encoding) {
308	case ENCODING_7BIT:
309	case ENCODING_8BIT:
310		*rt = buf[0];
311		break;
312	case ENCODING_16BIT_BIG:
313		*rt = (buf[0] << 8) | buf[1];
314		break;
315	case ENCODING_16BIT_LITTLE:
316		*rt = buf[0] | (buf[1] << 8);
317		break;
318	case ENCODING_32BIT_BIG:
319		*rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
320		    ((long) buf[2] << 8) | buf[3];
321		break;
322	case ENCODING_32BIT_LITTLE:
323		*rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
324		    ((long) buf[3] << 24);
325		break;
326	default:
327		return (-1);
328	}
329
330	return (0);
331}
332
333/*
334 * Input stream is read until the end of file is reached or until
335 * the section size is reached in case of ELF files. Contiguous
336 * characters of >= min_size(default 4) will be displayed.
337 */
338int
339find_strings(const char *name, FILE *pfile, off_t offset, off_t size)
340{
341	off_t cur_off, start_off;
342	char *obuf;
343	long c;
344	int i;
345
346	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
347		fprintf(stderr, "Unable to allocate memory: %s\n",
348		    strerror(errno));
349		return (1);
350	}
351
352	(void)fseeko(pfile, offset, SEEK_SET);
353	cur_off = offset;
354	start_off = 0;
355	for (;;) {
356		if ((offset + size) && (cur_off >= offset + size))
357			break;
358		start_off = cur_off;
359		memset(obuf, 0, min_len + 1);
360		for(i = 0; i < min_len; i++) {
361			if (getcharacter(pfile, &c) < 0)
362				goto _exit1;
363			if (PRINTABLE(c)) {
364				obuf[i] = c;
365				obuf[i + 1] = 0;
366				cur_off += encoding_size;
367			} else {
368				if (encoding == ENCODING_8BIT &&
369				    (uint8_t)c > 127) {
370					obuf[i] = c;
371					obuf[i + 1] = 0;
372					cur_off += encoding_size;
373					continue;
374				}
375				cur_off += encoding_size;
376				break;
377			}
378		}
379
380		if (i >= min_len && ((cur_off <= offset + size) ||
381		    !(offset + size))) {
382			if (show_filename)
383				printf("%s: ", name);
384			if (show_loc) {
385				switch (radix) {
386				case RADIX_DECIMAL:
387					printf("%7ju ", (uintmax_t)start_off);
388					break;
389				case RADIX_HEX:
390					printf("%7jx ", (uintmax_t)start_off);
391					break;
392				case RADIX_OCTAL:
393					printf("%7jo ", (uintmax_t)start_off);
394					break;
395				}
396			}
397			printf("%s", obuf);
398
399			for (;;) {
400				if ((offset + size) &&
401				    (cur_off >= offset + size))
402					break;
403				if (getcharacter(pfile, &c) < 0)
404					break;
405				cur_off += encoding_size;
406				if (encoding == ENCODING_8BIT &&
407				    (uint8_t)c > 127) {
408					putchar(c);
409					continue;
410				}
411				if (!PRINTABLE(c))
412					break;
413				putchar(c);
414			}
415			putchar('\n');
416		}
417	}
418_exit1:
419	free(obuf);
420	return (0);
421}
422
423#define	USAGE_MESSAGE	"\
424Usage: %s [options] [file...]\n\
425  Print contiguous sequences of printable characters.\n\n\
426  Options:\n\
427  -a     | --all               Scan the entire file for strings.\n\
428  -e ENC | --encoding=ENC      Select the character encoding to use.\n\
429  -f     | --print-file-name   Print the file name before each string.\n\
430  -h     | --help              Print a help message and exit.\n\
431  -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
432  -o                           Print offsets in octal.\n\
433  -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
434  -v     | --version           Print a version identifier and exit.\n"
435
436void
437usage(void)
438{
439
440	fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
441	exit(EXIT_FAILURE);
442}
443
444void
445show_version(void)
446{
447
448        printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
449        exit(EXIT_SUCCESS);
450}
451