1/*-
2 * Copyright (c) 2007 S.Sam Arun Raj
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/types.h>
28#include <sys/capsicum.h>
29#include <sys/stat.h>
30
31#include <capsicum_helpers.h>
32#include <ctype.h>
33#include <err.h>
34#include <errno.h>
35#include <fcntl.h>
36#include <getopt.h>
37#include <inttypes.h>
38#include <stdint.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <sysexits.h>
43#include <unistd.h>
44
45#include <libelf.h>
46#include <libelftc.h>
47#include <gelf.h>
48
49#include <libcasper.h>
50#include <casper/cap_fileargs.h>
51
52#include "_elftc.h"
53
54ELFTC_VCSID("$Id: strings.c 3648 2018-11-22 23:26:43Z emaste $");
55
56enum radix_style {
57	RADIX_DECIMAL,
58	RADIX_HEX,
59	RADIX_OCTAL
60};
61
62enum encoding_style {
63	ENCODING_7BIT,
64	ENCODING_8BIT,
65	ENCODING_16BIT_BIG,
66	ENCODING_16BIT_LITTLE,
67	ENCODING_32BIT_BIG,
68	ENCODING_32BIT_LITTLE
69};
70
71#define PRINTABLE(c)						\
72      ((c) >= 0 && (c) <= 255 &&				\
73	  ((c) == '\t' || isprint((c)) ||			\
74	      (encoding == ENCODING_8BIT && (c) > 127)))
75
76static int encoding_size, entire_file, show_filename, show_loc;
77static enum encoding_style encoding;
78static enum radix_style radix;
79static intmax_t min_len;
80
81static struct option strings_longopts[] = {
82	{ "all",		no_argument,		NULL,	'a'},
83	{ "bytes",		required_argument,	NULL,	'n'},
84	{ "encoding",		required_argument,	NULL,	'e'},
85	{ "help",		no_argument,		NULL,	'h'},
86	{ "print-file-name",	no_argument,		NULL,	'f'},
87	{ "radix",		required_argument,	NULL,	't'},
88	{ "version",		no_argument,		NULL,	'v'},
89	{ NULL, 0, NULL, 0 }
90};
91
92int	getcharacter(FILE *, long *);
93int	handle_file(fileargs_t *fa, const char *);
94int	handle_elf(const char *, FILE *);
95int	handle_binary(const char *, FILE *, size_t);
96int	find_strings(const char *, FILE *, off_t, off_t);
97void	show_version(void);
98void	usage(void);
99
100/*
101 * strings(1) extracts text(contiguous printable characters)
102 * from elf and binary files.
103 */
104int
105main(int argc, char **argv)
106{
107	fileargs_t *fa;
108	cap_rights_t rights;
109	int ch, rc;
110
111	rc = 0;
112	min_len = 0;
113	encoding_size = 1;
114	if (elf_version(EV_CURRENT) == EV_NONE)
115		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
116		    elf_errmsg(-1));
117
118	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
119	    strings_longopts, NULL)) != -1) {
120		switch ((char)ch) {
121		case 'a':
122			entire_file = 1;
123			break;
124		case 'e':
125			if (*optarg == 's') {
126				encoding = ENCODING_7BIT;
127			} else if (*optarg == 'S') {
128				encoding = ENCODING_8BIT;
129			} else if (*optarg == 'b') {
130				encoding = ENCODING_16BIT_BIG;
131				encoding_size = 2;
132			} else if (*optarg == 'B') {
133				encoding = ENCODING_32BIT_BIG;
134				encoding_size = 4;
135			} else if (*optarg == 'l') {
136				encoding = ENCODING_16BIT_LITTLE;
137				encoding_size = 2;
138			} else if (*optarg == 'L') {
139				encoding = ENCODING_32BIT_LITTLE;
140				encoding_size = 4;
141			} else
142				usage();
143			        /* NOTREACHED */
144			break;
145		case 'f':
146			show_filename = 1;
147			break;
148		case 'n':
149			min_len = strtoimax(optarg, (char**)NULL, 10);
150			if (min_len <= 0)
151				errx(EX_USAGE, "option -n should specify a "
152				    "positive decimal integer.");
153			break;
154		case 'o':
155			show_loc = 1;
156			radix = RADIX_OCTAL;
157			break;
158		case 't':
159			show_loc = 1;
160			if (*optarg == 'd')
161				radix = RADIX_DECIMAL;
162			else if (*optarg == 'o')
163				radix = RADIX_OCTAL;
164			else if (*optarg == 'x')
165				radix = RADIX_HEX;
166			else
167				usage();
168			        /* NOTREACHED */
169			break;
170		case 'v':
171		case 'V':
172			show_version();
173			/* NOTREACHED */
174		case '0':
175	        case '1':
176		case '2':
177		case '3':
178		case '4':
179		case '5':
180		case '6':
181		case '7':
182		case '8':
183		case '9':
184			min_len *= 10;
185			min_len += ch - '0';
186			break;
187		case 'h':
188		case '?':
189		default:
190			usage();
191			/* NOTREACHED */
192		}
193	}
194	argc -= optind;
195	argv += optind;
196
197	cap_rights_init(&rights, CAP_READ, CAP_SEEK, CAP_FSTAT, CAP_FCNTL, CAP_MMAP_R);
198	fa = fileargs_init(argc, argv, O_RDONLY, 0, &rights, FA_OPEN);
199	if (fa == NULL)
200		err(1, "Unable to initialize casper fileargs");
201
202	caph_cache_catpages();
203	if (caph_limit_stdio() < 0 || caph_enter_casper() < 0) {
204		fileargs_free(fa);
205		err(1, "Unable to enter capability mode");
206	}
207
208	if (min_len == 0)
209		min_len = 4;
210	if (*argv == NULL)
211		rc = find_strings("{standard input}", stdin, 0, 0);
212	else while (*argv != NULL) {
213		if (handle_file(fa, *argv) != 0)
214			rc = 1;
215		argv++;
216	}
217
218	fileargs_free(fa);
219
220	return (rc);
221}
222
223int
224handle_file(fileargs_t *fa, const char *name)
225{
226	FILE *pfile;
227	int rt;
228
229	if (name == NULL)
230		return (1);
231	pfile = fileargs_fopen(fa, name, "rb");
232	if (pfile == NULL) {
233		warnx("'%s': %s", name, strerror(errno));
234		return (1);
235	}
236
237	rt = handle_elf(name, pfile);
238	fclose(pfile);
239	return (rt);
240}
241
242/*
243 * Files not understood by handle_elf, will be passed off here and will
244 * treated as a binary file. This would include text file, core dumps ...
245 */
246int
247handle_binary(const char *name, FILE *pfile, size_t size)
248{
249
250	(void)fseeko(pfile, 0, SEEK_SET);
251	return (find_strings(name, pfile, 0, size));
252}
253
254/*
255 * Will analyse a file to see if it ELF, other files including ar(1),
256 * core dumps are passed off and treated as flat binary files. Unlike
257 * GNU size in FreeBSD this routine will not treat ELF object from
258 * different archs as flat binary files(has to overridden using -a).
259 */
260int
261handle_elf(const char *name, FILE *pfile)
262{
263	struct stat buf;
264	GElf_Ehdr elfhdr;
265	GElf_Shdr shdr;
266	Elf *elf;
267	Elf_Scn *scn;
268	int rc, fd;
269
270	rc = 0;
271	fd = fileno(pfile);
272	if (fstat(fd, &buf) < 0)
273		return (1);
274
275	/* If entire file is chosen, treat it as a binary file */
276	if (entire_file)
277		return (handle_binary(name, pfile, buf.st_size));
278
279	(void)lseek(fd, 0, SEEK_SET);
280	elf = elf_begin(fd, ELF_C_READ, NULL);
281	if (elf_kind(elf) != ELF_K_ELF) {
282		(void)elf_end(elf);
283		return (handle_binary(name, pfile, buf.st_size));
284	}
285
286	if (gelf_getehdr(elf, &elfhdr) == NULL) {
287		(void)elf_end(elf);
288		warnx("%s: ELF file could not be processed", name);
289		return (1);
290	}
291
292	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
293		(void)elf_end(elf);
294		return (handle_binary(name, pfile, buf.st_size));
295	} else {
296		scn = NULL;
297		while ((scn = elf_nextscn(elf, scn)) != NULL) {
298			if (gelf_getshdr(scn, &shdr) == NULL)
299				continue;
300			if (shdr.sh_type != SHT_NOBITS &&
301			    (shdr.sh_flags & SHF_ALLOC) != 0) {
302				rc = find_strings(name, pfile, shdr.sh_offset,
303				    shdr.sh_size);
304			}
305		}
306	}
307	(void)elf_end(elf);
308	return (rc);
309}
310
311/*
312 * Retrieves a character from input stream based on the encoding
313 * type requested.
314 */
315int
316getcharacter(FILE *pfile, long *rt)
317{
318	int i, c;
319	char buf[4];
320
321	for(i = 0; i < encoding_size; i++) {
322		c = getc(pfile);
323		if (c == EOF)
324			return (-1);
325		buf[i] = c;
326	}
327
328	switch (encoding) {
329	case ENCODING_7BIT:
330	case ENCODING_8BIT:
331		*rt = buf[0];
332		break;
333	case ENCODING_16BIT_BIG:
334		*rt = (buf[0] << 8) | buf[1];
335		break;
336	case ENCODING_16BIT_LITTLE:
337		*rt = buf[0] | (buf[1] << 8);
338		break;
339	case ENCODING_32BIT_BIG:
340		*rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
341		    ((long) buf[2] << 8) | buf[3];
342		break;
343	case ENCODING_32BIT_LITTLE:
344		*rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
345		    ((long) buf[3] << 24);
346		break;
347	default:
348		return (-1);
349	}
350
351	return (0);
352}
353
354/*
355 * Input stream is read until the end of file is reached or until
356 * the section size is reached in case of ELF files. Contiguous
357 * characters of >= min_size(default 4) will be displayed.
358 */
359int
360find_strings(const char *name, FILE *pfile, off_t offset, off_t size)
361{
362	off_t cur_off, start_off;
363	char *obuf;
364	long c;
365	int i;
366
367	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
368		fprintf(stderr, "Unable to allocate memory: %s\n",
369		    strerror(errno));
370		return (1);
371	}
372
373	(void)fseeko(pfile, offset, SEEK_SET);
374	cur_off = offset;
375	start_off = 0;
376	for (;;) {
377		if ((offset + size) && (cur_off >= offset + size))
378			break;
379		start_off = cur_off;
380		memset(obuf, 0, min_len + 1);
381		for(i = 0; i < min_len; i++) {
382			if (getcharacter(pfile, &c) < 0)
383				goto _exit1;
384			if (PRINTABLE(c)) {
385				obuf[i] = c;
386				obuf[i + 1] = 0;
387				cur_off += encoding_size;
388			} else {
389				if (encoding == ENCODING_8BIT &&
390				    (uint8_t)c > 127) {
391					obuf[i] = c;
392					obuf[i + 1] = 0;
393					cur_off += encoding_size;
394					continue;
395				}
396				cur_off += encoding_size;
397				break;
398			}
399		}
400
401		if (i >= min_len && ((cur_off <= offset + size) ||
402		    !(offset + size))) {
403			if (show_filename)
404				printf("%s: ", name);
405			if (show_loc) {
406				switch (radix) {
407				case RADIX_DECIMAL:
408					printf("%7ju ", (uintmax_t)start_off);
409					break;
410				case RADIX_HEX:
411					printf("%7jx ", (uintmax_t)start_off);
412					break;
413				case RADIX_OCTAL:
414					printf("%7jo ", (uintmax_t)start_off);
415					break;
416				}
417			}
418			printf("%s", obuf);
419
420			for (;;) {
421				if ((offset + size) &&
422				    (cur_off >= offset + size))
423					break;
424				if (getcharacter(pfile, &c) < 0)
425					break;
426				cur_off += encoding_size;
427				if (encoding == ENCODING_8BIT &&
428				    (uint8_t)c > 127) {
429					putchar(c);
430					continue;
431				}
432				if (!PRINTABLE(c))
433					break;
434				putchar(c);
435			}
436			putchar('\n');
437		}
438	}
439_exit1:
440	free(obuf);
441	return (0);
442}
443
444#define	USAGE_MESSAGE	"\
445Usage: %s [options] [file...]\n\
446  Print contiguous sequences of printable characters.\n\n\
447  Options:\n\
448  -a     | --all               Scan the entire file for strings.\n\
449  -e ENC | --encoding=ENC      Select the character encoding to use.\n\
450  -f     | --print-file-name   Print the file name before each string.\n\
451  -h     | --help              Print a help message and exit.\n\
452  -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
453  -o                           Print offsets in octal.\n\
454  -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
455  -v     | --version           Print a version identifier and exit.\n"
456
457void
458usage(void)
459{
460
461	fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
462	exit(EXIT_FAILURE);
463}
464
465void
466show_version(void)
467{
468
469        printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
470        exit(EXIT_SUCCESS);
471}
472