1/* vi: set sw=4 ts=4: */
2/*
3 * Mini tr implementation for busybox
4 *
5 ** Copyright (c) 1987,1997, Prentice Hall   All rights reserved.
6 *
7 * The name of Prentice Hall may not be used to endorse or promote
8 * products derived from this software without specific prior
9 * written permission.
10 *
11 * Copyright (c) Michiel Huisjes
12 *
13 * This version of tr is adapted from Minix tr and was modified
14 * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
15 *
16 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17 */
18/* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19 * TODO: xdigit, graph, print
20 */
21#include "libbb.h"
22
23#define ASCII 0377
24
25#define TR_OPT_complement	(1<<0)
26#define TR_OPT_delete		(1<<1)
27#define TR_OPT_squeeze_reps	(1<<2)
28
29static void map(char *pvector,
30		unsigned char *string1, unsigned int string1_len,
31		unsigned char *string2, unsigned int string2_len)
32{
33	char last = '0';
34	unsigned int i, j;
35
36	for (j = 0, i = 0; i < string1_len; i++) {
37		if (string2_len <= j)
38			pvector[string1[i]] = last;
39		else
40			pvector[string1[i]] = last = string2[j++];
41	}
42}
43
44/* supported constructs:
45 *   Ranges,  e.g.,  [0-9]  ==>  0123456789
46 *   Escapes, e.g.,  \a     ==>  Control-G
47 *	 Character classes, e.g. [:upper:] ==> A ... Z
48 */
49static unsigned int expand(const char *arg, char *buffer)
50{
51	char *buffer_start = buffer;
52	unsigned i;
53	unsigned char ac;
54#define CLO ":]\0"
55	static const char classes[] ALIGN1 =
56		"alpha"CLO "alnum"CLO "digit"CLO "lower"CLO "upper"CLO "space"CLO
57		"blank"CLO "punct"CLO "cntrl"CLO;
58#define CLASS_invalid 0 /* we increment the retval */
59#define CLASS_alpha 1
60#define CLASS_alnum 2
61#define CLASS_digit 3
62#define CLASS_lower 4
63#define CLASS_upper 5
64#define CLASS_space 6
65#define CLASS_blank 7
66#define CLASS_punct 8
67#define CLASS_cntrl 9
68//#define CLASS_xdigit 10
69//#define CLASS_graph 11
70//#define CLASS_print 12
71	while (*arg) {
72		if (*arg == '\\') {
73			arg++;
74			*buffer++ = bb_process_escape_sequence(&arg);
75		} else if (*(arg+1) == '-') {
76			ac = *(arg+2);
77			if (ac == 0) {
78				*buffer++ = *arg++;
79				continue;
80			}
81			i = *arg;
82			while (i <= ac)
83				*buffer++ = i++;
84			arg += 3; /* Skip the assumed a-z */
85		} else if (*arg == '[') {
86			arg++;
87			i = *arg++;
88			if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
89				smalluint j;
90				{ /* not really pretty.. */
91					char *tmp = xstrndup(arg, 7); // warning: xdigit needs 8, not 7
92					j = index_in_strings(classes, tmp) + 1;
93					free(tmp);
94				}
95				if (j == CLASS_alnum || j == CLASS_digit) {
96					for (i = '0'; i <= '9'; i++)
97						*buffer++ = i;
98				}
99				if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
100					for (i = 'A'; i <= 'Z'; i++)
101						*buffer++ = i;
102				}
103				if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
104					for (i = 'a'; i <= 'z'; i++)
105						*buffer++ = i;
106				}
107				if (j == CLASS_space || j == CLASS_blank) {
108					*buffer++ = '\t';
109					if (j == CLASS_space) {
110						*buffer++ = '\n';
111						*buffer++ = '\v';
112						*buffer++ = '\f';
113						*buffer++ = '\r';
114					}
115					*buffer++ = ' ';
116				}
117				if (j == CLASS_punct || j == CLASS_cntrl) {
118					for (i = 0; i <= ASCII; i++)
119						if ((j == CLASS_punct &&
120							 isprint(i) && (!isalnum(i)) && (!isspace(i))) ||
121							(j == CLASS_cntrl && iscntrl(i)))
122							*buffer++ = i;
123				}
124				if (j == CLASS_invalid) {
125					*buffer++ = '[';
126					*buffer++ = ':';
127					continue;
128				}
129				break;
130			}
131			if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
132				*buffer++ = *arg;
133				arg += 3;	/* Skip the closing =] */
134				continue;
135			}
136			if (*arg++ != '-') {
137				*buffer++ = '[';
138				arg -= 2;
139				continue;
140			}
141			ac = *arg++;
142			while (i <= ac)
143				*buffer++ = i++;
144			arg++;	/* Skip the assumed ']' */
145		} else
146			*buffer++ = *arg++;
147	}
148	return (buffer - buffer_start);
149}
150
151static int complement(char *buffer, int buffer_len)
152{
153	int i, j, ix;
154	char conv[ASCII + 2];
155
156	ix = 0;
157	for (i = 0; i <= ASCII; i++) {
158		for (j = 0; j < buffer_len; j++)
159			if (buffer[j] == i)
160				break;
161		if (j == buffer_len)
162			conv[ix++] = i & ASCII;
163	}
164	memcpy(buffer, conv, ix);
165	return ix;
166}
167
168int tr_main(int argc, char **argv);
169int tr_main(int argc, char **argv)
170{
171	unsigned char *ptr;
172	int output_length = 0, input_length;
173	int idx = 1;
174	int i;
175	smalluint flags = 0;
176	size_t read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;
177	RESERVE_CONFIG_UBUFFER(output, BUFSIZ);
178	RESERVE_CONFIG_BUFFER(vector, ASCII+1);
179	RESERVE_CONFIG_BUFFER(invec,  ASCII+1);
180	RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
181
182	if (argc > 1 && argv[idx][0] == '-') {
183		for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {
184			if (*ptr == 'c')
185				flags |= TR_OPT_complement;
186			else if (*ptr == 'd')
187				flags |= TR_OPT_delete;
188			else if (*ptr == 's')
189				flags |= TR_OPT_squeeze_reps;
190			else
191				bb_show_usage();
192		}
193		idx++;
194	}
195	for (i = 0; i <= ASCII; i++) {
196		vector[i] = i;
197		invec[i] = outvec[i] = FALSE;
198	}
199
200#define tr_buf bb_common_bufsiz1
201	if (argv[idx] != NULL) {
202		input_length = expand(argv[idx++], tr_buf);
203		if (flags & TR_OPT_complement)
204			input_length = complement(tr_buf, input_length);
205		if (argv[idx] != NULL) {
206			if (*argv[idx] == '\0')
207				bb_error_msg_and_die("STRING2 cannot be empty");
208			output_length = expand(argv[idx], output);
209			map(vector, tr_buf, input_length, output, output_length);
210		}
211		for (i = 0; i < input_length; i++)
212			invec[(unsigned char)tr_buf[i]] = TRUE;
213		for (i = 0; i < output_length; i++)
214			outvec[output[i]] = TRUE;
215	}
216
217	for (;;) {
218		/* If we're out of input, flush output and read more input. */
219		if (in_index == read_chars) {
220			if (out_index) {
221				xwrite(STDOUT_FILENO, (char *)output, out_index);
222				out_index = 0;
223			}
224			read_chars = read(STDIN_FILENO, tr_buf, BUFSIZ);
225			if (read_chars <= 0) {
226				if (write(STDOUT_FILENO, (char *)output, out_index) != out_index)
227					bb_perror_msg(bb_msg_write_error);
228				exit(EXIT_SUCCESS);
229			}
230			in_index = 0;
231		}
232		c = tr_buf[in_index++];
233		coded = vector[c];
234		if ((flags & TR_OPT_delete) && invec[c])
235			continue;
236		if ((flags & TR_OPT_squeeze_reps) && last == coded &&
237			(invec[c] || outvec[coded]))
238			continue;
239		output[out_index++] = last = coded;
240	}
241	/* NOTREACHED */
242	return EXIT_SUCCESS;
243}
244