1260684Skaiw/*-
2260684Skaiw * Copyright (c) 2007 S.Sam Arun Raj
3260684Skaiw * All rights reserved.
4260684Skaiw *
5260684Skaiw * Redistribution and use in source and binary forms, with or without
6260684Skaiw * modification, are permitted provided that the following conditions
7260684Skaiw * are met:
8260684Skaiw * 1. Redistributions of source code must retain the above copyright
9260684Skaiw *    notice, this list of conditions and the following disclaimer.
10260684Skaiw * 2. Redistributions in binary form must reproduce the above copyright
11260684Skaiw *    notice, this list of conditions and the following disclaimer in the
12260684Skaiw *    documentation and/or other materials provided with the distribution.
13260684Skaiw *
14260684Skaiw * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15260684Skaiw * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16260684Skaiw * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17260684Skaiw * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18260684Skaiw * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19260684Skaiw * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20260684Skaiw * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21260684Skaiw * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22260684Skaiw * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23260684Skaiw * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24260684Skaiw * SUCH DAMAGE.
25260684Skaiw */
26260684Skaiw
27260684Skaiw#include <sys/stat.h>
28260684Skaiw#include <sys/types.h>
29260684Skaiw
30260684Skaiw#include <ctype.h>
31260684Skaiw#include <err.h>
32260684Skaiw#include <errno.h>
33260684Skaiw#include <fcntl.h>
34260684Skaiw#include <getopt.h>
35260684Skaiw#include <inttypes.h>
36260684Skaiw#include <stdint.h>
37260684Skaiw#include <stdio.h>
38260684Skaiw#include <stdlib.h>
39260684Skaiw#include <string.h>
40295577Semaste#include <sysexits.h>
41260684Skaiw#include <unistd.h>
42260684Skaiw
43260684Skaiw#include <libelf.h>
44260684Skaiw#include <libelftc.h>
45260684Skaiw#include <gelf.h>
46260684Skaiw
47260684Skaiw#include "_elftc.h"
48260684Skaiw
49300311SemasteELFTC_VCSID("$Id: strings.c 3446 2016-05-03 01:31:17Z emaste $");
50260684Skaiw
51260684Skaiwenum return_code {
52260684Skaiw	RETURN_OK,
53260684Skaiw	RETURN_NOINPUT,
54260684Skaiw	RETURN_SOFTWARE
55260684Skaiw};
56260684Skaiw
57260684Skaiwenum radix_style {
58260684Skaiw	RADIX_DECIMAL,
59260684Skaiw	RADIX_HEX,
60260684Skaiw	RADIX_OCTAL
61260684Skaiw};
62260684Skaiw
63260684Skaiwenum encoding_style {
64260684Skaiw	ENCODING_7BIT,
65260684Skaiw	ENCODING_8BIT,
66260684Skaiw	ENCODING_16BIT_BIG,
67260684Skaiw	ENCODING_16BIT_LITTLE,
68260684Skaiw	ENCODING_32BIT_BIG,
69260684Skaiw	ENCODING_32BIT_LITTLE
70260684Skaiw};
71260684Skaiw
72260684Skaiw#define PRINTABLE(c)						\
73260684Skaiw      ((c) >= 0 && (c) <= 255 && 				\
74260684Skaiw	  ((c) == '\t' || isprint((c)) ||			\
75260684Skaiw	      (encoding == ENCODING_8BIT && (c) > 127)))
76260684Skaiw
77295577Semastestatic int encoding_size, entire_file, show_filename, show_loc;
78275369Semastestatic enum encoding_style encoding;
79275369Semastestatic enum radix_style radix;
80295577Semastestatic intmax_t min_len;
81260684Skaiw
82260684Skaiwstatic struct option strings_longopts[] = {
83260684Skaiw	{ "all",		no_argument,		NULL,	'a'},
84260684Skaiw	{ "bytes",		required_argument,	NULL,	'n'},
85260684Skaiw	{ "encoding",		required_argument,	NULL,	'e'},
86260684Skaiw	{ "help",		no_argument,		NULL,	'h'},
87260684Skaiw	{ "print-file-name",	no_argument,		NULL,	'f'},
88260684Skaiw	{ "radix",		required_argument,	NULL,	't'},
89260684Skaiw	{ "version",		no_argument,		NULL,	'v'},
90260684Skaiw	{ NULL, 0, NULL, 0 }
91260684Skaiw};
92260684Skaiw
93260684Skaiwlong	getcharacter(void);
94260684Skaiwint	handle_file(const char *);
95260684Skaiwint	handle_elf(const char *, int);
96260684Skaiwint	handle_binary(const char *, int);
97260684Skaiwint	find_strings(const char *, off_t, off_t);
98260684Skaiwvoid	show_version(void);
99260684Skaiwvoid	usage(void);
100260684Skaiw
101260684Skaiw/*
102260684Skaiw * strings(1) extracts text(contiguous printable characters)
103260684Skaiw * from elf and binary files.
104260684Skaiw */
105260684Skaiwint
106260684Skaiwmain(int argc, char **argv)
107260684Skaiw{
108260684Skaiw	int ch, rc;
109260684Skaiw
110260684Skaiw	rc = RETURN_OK;
111260684Skaiw	min_len = 0;
112260684Skaiw	encoding_size = 1;
113260684Skaiw	if (elf_version(EV_CURRENT) == EV_NONE)
114260684Skaiw		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
115260684Skaiw		    elf_errmsg(-1));
116260684Skaiw
117260684Skaiw	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
118260684Skaiw	    strings_longopts, NULL)) != -1)
119260684Skaiw		switch((char)ch) {
120260684Skaiw		case 'a':
121260684Skaiw			entire_file = 1;
122260684Skaiw			break;
123260684Skaiw		case 'e':
124260684Skaiw			if (*optarg == 's') {
125260684Skaiw				encoding = ENCODING_7BIT;
126260684Skaiw			} else if (*optarg == 'S') {
127260684Skaiw				encoding = ENCODING_8BIT;
128260684Skaiw			} else if (*optarg == 'b') {
129260684Skaiw				encoding = ENCODING_16BIT_BIG;
130260684Skaiw				encoding_size = 2;
131260684Skaiw			} else if (*optarg == 'B') {
132260684Skaiw				encoding = ENCODING_32BIT_BIG;
133260684Skaiw				encoding_size = 4;
134260684Skaiw			} else if (*optarg == 'l') {
135260684Skaiw				encoding = ENCODING_16BIT_LITTLE;
136260684Skaiw				encoding_size = 2;
137260684Skaiw			} else if (*optarg == 'L') {
138260684Skaiw				encoding = ENCODING_32BIT_LITTLE;
139260684Skaiw				encoding_size = 4;
140260684Skaiw			} else
141260684Skaiw				usage();
142260684Skaiw			        /* NOTREACHED */
143260684Skaiw			break;
144260684Skaiw		case 'f':
145260684Skaiw			show_filename = 1;
146260684Skaiw			break;
147260684Skaiw		case 'n':
148295577Semaste			min_len = strtoimax(optarg, (char**)NULL, 10);
149295577Semaste			if (min_len <= 0)
150295577Semaste				errx(EX_USAGE, "option -n should specify a "
151295577Semaste				    "positive decimal integer.");
152260684Skaiw			break;
153260684Skaiw		case 'o':
154260684Skaiw			show_loc = 1;
155260684Skaiw			radix = RADIX_OCTAL;
156260684Skaiw			break;
157260684Skaiw		case 't':
158260684Skaiw			show_loc = 1;
159260684Skaiw			if (*optarg == 'd')
160260684Skaiw				radix = RADIX_DECIMAL;
161260684Skaiw			else if (*optarg == 'o')
162260684Skaiw				radix = RADIX_OCTAL;
163260684Skaiw			else if (*optarg == 'x')
164260684Skaiw				radix = RADIX_HEX;
165260684Skaiw			else
166260684Skaiw				usage();
167260684Skaiw			        /* NOTREACHED */
168260684Skaiw			break;
169260684Skaiw		case 'v':
170260684Skaiw		case 'V':
171260684Skaiw			show_version();
172260684Skaiw			/* NOTREACHED */
173260684Skaiw		case '0':
174260684Skaiw	        case '1':
175260684Skaiw		case '2':
176260684Skaiw		case '3':
177260684Skaiw		case '4':
178260684Skaiw		case '5':
179260684Skaiw		case '6':
180260684Skaiw		case '7':
181260684Skaiw		case '8':
182260684Skaiw		case '9':
183260684Skaiw			min_len *= 10;
184260684Skaiw			min_len += ch - '0';
185260684Skaiw			break;
186260684Skaiw		case 'h':
187260684Skaiw		case '?':
188260684Skaiw		default:
189260684Skaiw			usage();
190260684Skaiw			/* NOTREACHED */
191260684Skaiw		}
192260684Skaiw	argc -= optind;
193260684Skaiw	argv += optind;
194260684Skaiw
195260684Skaiw	if (!min_len)
196260684Skaiw		min_len = 4;
197260684Skaiw	if (!*argv)
198260684Skaiw		rc = handle_file("{standard input}");
199260684Skaiw	else while (*argv) {
200260684Skaiw		rc = handle_file(*argv);
201260684Skaiw		argv++;
202260684Skaiw	}
203260684Skaiw	return (rc);
204260684Skaiw}
205260684Skaiw
206260684Skaiwint
207260684Skaiwhandle_file(const char *name)
208260684Skaiw{
209260684Skaiw	int fd, rt;
210260684Skaiw
211260684Skaiw	if (name == NULL)
212260684Skaiw		return (RETURN_NOINPUT);
213260684Skaiw	if (strcmp("{standard input}", name) != 0) {
214260684Skaiw		if (freopen(name, "rb", stdin) == NULL) {
215260684Skaiw			warnx("'%s': %s", name, strerror(errno));
216260684Skaiw			return (RETURN_NOINPUT);
217260684Skaiw		}
218260684Skaiw	} else {
219260684Skaiw		return (find_strings(name, (off_t)0, (off_t)0));
220260684Skaiw	}
221260684Skaiw
222260684Skaiw	fd = fileno(stdin);
223260684Skaiw	if (fd < 0)
224260684Skaiw		return (RETURN_NOINPUT);
225260684Skaiw	rt = handle_elf(name, fd);
226260684Skaiw	return (rt);
227260684Skaiw}
228260684Skaiw
229260684Skaiw/*
230260684Skaiw * Files not understood by handle_elf, will be passed off here and will
231260684Skaiw * treated as a binary file. This would include text file, core dumps ...
232260684Skaiw */
233260684Skaiwint
234260684Skaiwhandle_binary(const char *name, int fd)
235260684Skaiw{
236260684Skaiw	struct stat buf;
237260684Skaiw
238260684Skaiw	memset(&buf, 0, sizeof(struct stat));
239260684Skaiw	(void) lseek(fd, (off_t)0, SEEK_SET);
240260684Skaiw	if (!fstat(fd, &buf))
241260684Skaiw		return (find_strings(name, (off_t)0, buf.st_size));
242260684Skaiw	return (RETURN_SOFTWARE);
243260684Skaiw}
244260684Skaiw
245260684Skaiw/*
246260684Skaiw * Will analyse a file to see if it ELF, other files including ar(1),
247260684Skaiw * core dumps are passed off and treated as flat binary files. Unlike
248260684Skaiw * GNU size in FreeBSD this routine will not treat ELF object from
249260684Skaiw * different archs as flat binary files(has to overridden using -a).
250260684Skaiw */
251260684Skaiwint
252260684Skaiwhandle_elf(const char *name, int fd)
253260684Skaiw{
254260684Skaiw	GElf_Ehdr elfhdr;
255260684Skaiw	GElf_Shdr shdr;
256260684Skaiw	Elf *elf;
257260684Skaiw	Elf_Scn *scn;
258260684Skaiw	int rc;
259260684Skaiw
260260684Skaiw	rc = RETURN_OK;
261300311Semaste	/* If entire file is chosen, treat it as a binary file */
262260684Skaiw	if (entire_file)
263260684Skaiw		return (handle_binary(name, fd));
264260684Skaiw
265260684Skaiw	(void) lseek(fd, (off_t)0, SEEK_SET);
266260684Skaiw	elf = elf_begin(fd, ELF_C_READ, NULL);
267260684Skaiw	if (elf_kind(elf) != ELF_K_ELF) {
268260684Skaiw		(void) elf_end(elf);
269260684Skaiw		return (handle_binary(name, fd));
270260684Skaiw	}
271260684Skaiw
272260684Skaiw	if (gelf_getehdr(elf, &elfhdr) == NULL) {
273260684Skaiw		(void) elf_end(elf);
274260684Skaiw		warnx("%s: ELF file could not be processed", name);
275260684Skaiw		return (RETURN_SOFTWARE);
276260684Skaiw	}
277260684Skaiw
278260684Skaiw	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
279260684Skaiw		(void) elf_end(elf);
280260684Skaiw		return (handle_binary(name, fd));
281260684Skaiw	} else {
282260684Skaiw		scn = NULL;
283260684Skaiw		while ((scn = elf_nextscn(elf, scn)) != NULL) {
284260684Skaiw			if (gelf_getshdr(scn, &shdr) == NULL)
285260684Skaiw				continue;
286260684Skaiw			if (shdr.sh_type != SHT_NOBITS &&
287260684Skaiw			    (shdr.sh_flags & SHF_ALLOC) != 0) {
288260684Skaiw				rc = find_strings(name, shdr.sh_offset,
289260684Skaiw				    shdr.sh_size);
290260684Skaiw			}
291260684Skaiw		}
292260684Skaiw	}
293260684Skaiw	(void) elf_end(elf);
294260684Skaiw	return (rc);
295260684Skaiw}
296260684Skaiw
297260684Skaiw/*
298260684Skaiw * Retrieves a character from input stream based on the encoding
299260684Skaiw * type requested.
300260684Skaiw */
301260684Skaiwlong
302260684Skaiwgetcharacter(void)
303260684Skaiw{
304260684Skaiw	long rt;
305260684Skaiw	int i;
306260684Skaiw	char buf[4], c;
307260684Skaiw
308260684Skaiw	rt = EOF;
309260684Skaiw	for(i = 0; i < encoding_size; i++) {
310260684Skaiw		c = getc(stdin);
311260684Skaiw		if (feof(stdin))
312260684Skaiw			return (EOF);
313260684Skaiw		buf[i] = c;
314260684Skaiw	}
315260684Skaiw
316260684Skaiw	switch(encoding) {
317260684Skaiw	case ENCODING_7BIT:
318260684Skaiw	case ENCODING_8BIT:
319260684Skaiw		rt = buf[0];
320260684Skaiw		break;
321260684Skaiw	case ENCODING_16BIT_BIG:
322260684Skaiw		rt = (buf[0] << 8) | buf[1];
323260684Skaiw		break;
324260684Skaiw	case ENCODING_16BIT_LITTLE:
325260684Skaiw		 rt = buf[0] | (buf[1] << 8);
326260684Skaiw		 break;
327260684Skaiw	case ENCODING_32BIT_BIG:
328260684Skaiw		rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
329260684Skaiw           	    ((long) buf[2] << 8) | buf[3];
330260684Skaiw           	break;
331260684Skaiw	case ENCODING_32BIT_LITTLE:
332260684Skaiw		rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
333260684Skaiw        	    ((long) buf[3] << 24);
334260684Skaiw           	break;
335260684Skaiw	}
336260684Skaiw	return (rt);
337260684Skaiw}
338260684Skaiw
339260684Skaiw/*
340260684Skaiw * Input stream stdin is read until the end of file is reached or until
341260684Skaiw * the section size is reached in case of ELF files. Contiguous
342260684Skaiw * characters of >= min_size(default 4) will be displayed.
343260684Skaiw */
344260684Skaiwint
345260684Skaiwfind_strings(const char *name, off_t offset, off_t size)
346260684Skaiw{
347260684Skaiw	off_t cur_off, start_off;
348260684Skaiw	char *obuf;
349260684Skaiw	long c;
350260684Skaiw	int i;
351260684Skaiw
352260684Skaiw	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
353260684Skaiw		(void) fprintf(stderr, "Unable to allocate memory: %s\n",
354260684Skaiw		     strerror(errno));
355260684Skaiw		return (RETURN_SOFTWARE);
356260684Skaiw	}
357260684Skaiw
358260684Skaiw	(void) fseeko(stdin, offset, SEEK_SET);
359260684Skaiw	cur_off = offset;
360260684Skaiw	start_off = 0;
361260684Skaiw	while(1) {
362260684Skaiw		if ((offset + size) && (cur_off >= offset + size))
363260684Skaiw			break;
364260684Skaiw		start_off = cur_off;
365260684Skaiw		memset(obuf, 0, min_len+1);
366260684Skaiw		for(i = 0; i < min_len; i++) {
367260684Skaiw			c = getcharacter();
368260684Skaiw			if (c == EOF && feof(stdin))
369260684Skaiw				goto _exit1;
370260684Skaiw		 	if (PRINTABLE(c)) {
371260684Skaiw		 		obuf[i] = c;
372260684Skaiw		 		obuf[i+1] = 0;
373260684Skaiw		 		cur_off += encoding_size;
374260684Skaiw		 	} else {
375260684Skaiw				if (encoding == ENCODING_8BIT &&
376260684Skaiw				    (uint8_t)c > 127) {
377260684Skaiw			 		obuf[i] = c;
378260684Skaiw			 		obuf[i+1] = 0;
379260684Skaiw			 		cur_off += encoding_size;
380260684Skaiw			 		continue;
381260684Skaiw			 	}
382260684Skaiw	 			cur_off += encoding_size;
383260684Skaiw	 			break;
384260684Skaiw		 	}
385260684Skaiw		}
386260684Skaiw
387260684Skaiw		if (i >= min_len && ((cur_off <= offset + size) ||
388260684Skaiw		    !(offset + size))) {
389260684Skaiw			if (show_filename)
390260684Skaiw				printf ("%s: ", name);
391260684Skaiw			if (show_loc) {
392260684Skaiw				switch(radix) {
393260684Skaiw				case RADIX_DECIMAL:
394260684Skaiw					(void) printf("%7ju ",
395260684Skaiw					    (uintmax_t)start_off);
396260684Skaiw					break;
397260684Skaiw				case RADIX_HEX:
398260684Skaiw					(void) printf("%7jx ",
399260684Skaiw					    (uintmax_t)start_off);
400260684Skaiw					break;
401260684Skaiw				case RADIX_OCTAL:
402260684Skaiw					(void) printf("%7jo ",
403260684Skaiw					    (uintmax_t)start_off);
404260684Skaiw					break;
405260684Skaiw				}
406260684Skaiw			}
407260684Skaiw			printf("%s", obuf);
408260684Skaiw
409260684Skaiw			while(1) {
410260684Skaiw				if ((offset + size) &&
411260684Skaiw				    (cur_off >= offset + size))
412260684Skaiw					break;
413260684Skaiw				c = getcharacter();
414260684Skaiw				cur_off += encoding_size;
415260684Skaiw				if (encoding == ENCODING_8BIT &&
416260684Skaiw				    (uint8_t)c > 127) {
417260684Skaiw			 		putchar(c);
418260684Skaiw			 		continue;
419260684Skaiw			 	}
420260684Skaiw				if (!PRINTABLE(c) || c == EOF)
421260684Skaiw					break;
422260684Skaiw				putchar(c);
423260684Skaiw			}
424260684Skaiw			putchar('\n');
425260684Skaiw		}
426260684Skaiw	}
427260684Skaiw_exit1:
428260684Skaiw	free(obuf);
429260684Skaiw	return (RETURN_OK);
430260684Skaiw}
431260684Skaiw
432260684Skaiw#define	USAGE_MESSAGE	"\
433260684SkaiwUsage: %s [options] [file...]\n\
434260684Skaiw  Print contiguous sequences of printable characters.\n\n\
435260684Skaiw  Options:\n\
436260684Skaiw  -a     | --all               Scan the entire file for strings.\n\
437260684Skaiw  -e ENC | --encoding=ENC      Select the character encoding to use.\n\
438260684Skaiw  -f     | --print-file-name   Print the file name before each string.\n\
439260684Skaiw  -h     | --help              Print a help message and exit.\n\
440260684Skaiw  -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
441260684Skaiw  -o                           Print offsets in octal.\n\
442260684Skaiw  -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
443260684Skaiw  -v     | --version           Print a version identifier and exit.\n"
444260684Skaiw
445260684Skaiwvoid
446260684Skaiwusage(void)
447260684Skaiw{
448260684Skaiw	(void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
449260684Skaiw	exit(EXIT_FAILURE);
450260684Skaiw}
451260684Skaiw
452260684Skaiwvoid
453260684Skaiwshow_version(void)
454260684Skaiw{
455260684Skaiw        (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
456260684Skaiw        exit(EXIT_SUCCESS);
457260684Skaiw}
458