1#include <sys/cdefs.h>
2__FBSDID("$FreeBSD$");
3
4#include <stdio.h>
5#include <string.h>
6
7#include "split.ih"
8
9/*
10 - split - divide a string into fields, like awk split()
11 == int split(char *string, char *fields[], int nfields, char *sep);
12 - fields: list is not NULL-terminated
13 - nfields: number of entries available in fields[]
14 - sep: "" white, "c" single char, "ab" [ab]+
15 */
16int				/* number of fields, including overflow */
17split(char *string, char *fields[], int nfields, char *sep)
18{
19	char *p = string;
20	char c;			/* latest character */
21	char sepc = sep[0];
22	char sepc2;
23	int fn;
24	char **fp = fields;
25	char *sepp;
26	int trimtrail;
27
28	/* white space */
29	if (sepc == '\0') {
30		while ((c = *p++) == ' ' || c == '\t')
31			continue;
32		p--;
33		trimtrail = 1;
34		sep = " \t";	/* note, code below knows this is 2 long */
35		sepc = ' ';
36	} else
37		trimtrail = 0;
38	sepc2 = sep[1];		/* now we can safely pick this up */
39
40	/* catch empties */
41	if (*p == '\0')
42		return(0);
43
44	/* single separator */
45	if (sepc2 == '\0') {
46		fn = nfields;
47		for (;;) {
48			*fp++ = p;
49			fn--;
50			if (fn == 0)
51				break;
52			while ((c = *p++) != sepc)
53				if (c == '\0')
54					return(nfields - fn);
55			*(p-1) = '\0';
56		}
57		/* we have overflowed the fields vector -- just count them */
58		fn = nfields;
59		for (;;) {
60			while ((c = *p++) != sepc)
61				if (c == '\0')
62					return(fn);
63			fn++;
64		}
65		/* not reached */
66	}
67
68	/* two separators */
69	if (sep[2] == '\0') {
70		fn = nfields;
71		for (;;) {
72			*fp++ = p;
73			fn--;
74			while ((c = *p++) != sepc && c != sepc2)
75				if (c == '\0') {
76					if (trimtrail && **(fp-1) == '\0')
77						fn++;
78					return(nfields - fn);
79				}
80			if (fn == 0)
81				break;
82			*(p-1) = '\0';
83			while ((c = *p++) == sepc || c == sepc2)
84				continue;
85			p--;
86		}
87		/* we have overflowed the fields vector -- just count them */
88		fn = nfields;
89		while (c != '\0') {
90			while ((c = *p++) == sepc || c == sepc2)
91				continue;
92			p--;
93			fn++;
94			while ((c = *p++) != '\0' && c != sepc && c != sepc2)
95				continue;
96		}
97		/* might have to trim trailing white space */
98		if (trimtrail) {
99			p--;
100			while ((c = *--p) == sepc || c == sepc2)
101				continue;
102			p++;
103			if (*p != '\0') {
104				if (fn == nfields+1)
105					*p = '\0';
106				fn--;
107			}
108		}
109		return(fn);
110	}
111
112	/* n separators */
113	fn = 0;
114	for (;;) {
115		if (fn < nfields)
116			*fp++ = p;
117		fn++;
118		for (;;) {
119			c = *p++;
120			if (c == '\0')
121				return(fn);
122			sepp = sep;
123			while ((sepc = *sepp++) != '\0' && sepc != c)
124				continue;
125			if (sepc != '\0')	/* it was a separator */
126				break;
127		}
128		if (fn < nfields)
129			*(p-1) = '\0';
130		for (;;) {
131			c = *p++;
132			sepp = sep;
133			while ((sepc = *sepp++) != '\0' && sepc != c)
134				continue;
135			if (sepc == '\0')	/* it wasn't a separator */
136				break;
137		}
138		p--;
139	}
140
141	/* not reached */
142}
143
144#ifdef TEST_SPLIT
145
146
147/*
148 * test program
149 * pgm		runs regression
150 * pgm sep	splits stdin lines by sep
151 * pgm str sep	splits str by sep
152 * pgm str sep n	splits str by sep n times
153 */
154int
155main(int argc, char *argv[])
156{
157	char buf[512];
158	int n;
159#	define	MNF	10
160	char *fields[MNF];
161
162	if (argc > 4)
163		for (n = atoi(argv[3]); n > 0; n--) {
164			(void) strcpy(buf, argv[1]);
165		}
166	else if (argc > 3)
167		for (n = atoi(argv[3]); n > 0; n--) {
168			(void) strcpy(buf, argv[1]);
169			(void) split(buf, fields, MNF, argv[2]);
170		}
171	else if (argc > 2)
172		dosplit(argv[1], argv[2]);
173	else if (argc > 1)
174		while (fgets(buf, sizeof(buf), stdin) != NULL) {
175			buf[strlen(buf)-1] = '\0';	/* stomp newline */
176			dosplit(buf, argv[1]);
177		}
178	else
179		regress();
180
181	exit(0);
182}
183
184void
185dosplit(char *string, char *seps)
186{
187#	define	NF	5
188	char *fields[NF];
189	int nf;
190
191	nf = split(string, fields, NF, seps);
192	print(nf, NF, fields);
193}
194
195void
196print(int nf, int nfp, char *fields[])
197{
198	int fn;
199	int bound;
200
201	bound = (nf > nfp) ? nfp : nf;
202	printf("%d:\t", nf);
203	for (fn = 0; fn < bound; fn++)
204		printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
205}
206
207#define	RNF	5		/* some table entries know this */
208struct {
209	char *str;
210	char *seps;
211	int nf;
212	char *fi[RNF];
213} tests[] = {
214	"",		" ",	0,	{ "" },
215	" ",		" ",	2,	{ "", "" },
216	"x",		" ",	1,	{ "x" },
217	"xy",		" ",	1,	{ "xy" },
218	"x y",		" ",	2,	{ "x", "y" },
219	"abc def  g ",	" ",	5,	{ "abc", "def", "", "g", "" },
220	"  a bcd",	" ",	4,	{ "", "", "a", "bcd" },
221	"a b c d e f",	" ",	6,	{ "a", "b", "c", "d", "e f" },
222	" a b c d ",	" ",	6,	{ "", "a", "b", "c", "d " },
223
224	"",		" _",	0,	{ "" },
225	" ",		" _",	2,	{ "", "" },
226	"x",		" _",	1,	{ "x" },
227	"x y",		" _",	2,	{ "x", "y" },
228	"ab _ cd",	" _",	2,	{ "ab", "cd" },
229	" a_b  c ",	" _",	5,	{ "", "a", "b", "c", "" },
230	"a b c_d e f",	" _",	6,	{ "a", "b", "c", "d", "e f" },
231	" a b c d ",	" _",	6,	{ "", "a", "b", "c", "d " },
232
233	"",		" _~",	0,	{ "" },
234	" ",		" _~",	2,	{ "", "" },
235	"x",		" _~",	1,	{ "x" },
236	"x y",		" _~",	2,	{ "x", "y" },
237	"ab _~ cd",	" _~",	2,	{ "ab", "cd" },
238	" a_b  c~",	" _~",	5,	{ "", "a", "b", "c", "" },
239	"a b_c d~e f",	" _~",	6,	{ "a", "b", "c", "d", "e f" },
240	"~a b c d ",	" _~",	6,	{ "", "a", "b", "c", "d " },
241
242	"",		" _~-",	0,	{ "" },
243	" ",		" _~-",	2,	{ "", "" },
244	"x",		" _~-",	1,	{ "x" },
245	"x y",		" _~-",	2,	{ "x", "y" },
246	"ab _~- cd",	" _~-",	2,	{ "ab", "cd" },
247	" a_b  c~",	" _~-",	5,	{ "", "a", "b", "c", "" },
248	"a b_c-d~e f",	" _~-",	6,	{ "a", "b", "c", "d", "e f" },
249	"~a-b c d ",	" _~-",	6,	{ "", "a", "b", "c", "d " },
250
251	"",		"  ",	0,	{ "" },
252	" ",		"  ",	2,	{ "", "" },
253	"x",		"  ",	1,	{ "x" },
254	"xy",		"  ",	1,	{ "xy" },
255	"x y",		"  ",	2,	{ "x", "y" },
256	"abc def  g ",	"  ",	4,	{ "abc", "def", "g", "" },
257	"  a bcd",	"  ",	3,	{ "", "a", "bcd" },
258	"a b c d e f",	"  ",	6,	{ "a", "b", "c", "d", "e f" },
259	" a b c d ",	"  ",	6,	{ "", "a", "b", "c", "d " },
260
261	"",		"",	0,	{ "" },
262	" ",		"",	0,	{ "" },
263	"x",		"",	1,	{ "x" },
264	"xy",		"",	1,	{ "xy" },
265	"x y",		"",	2,	{ "x", "y" },
266	"abc def  g ",	"",	3,	{ "abc", "def", "g" },
267	"\t a bcd",	"",	2,	{ "a", "bcd" },
268	"  a \tb\t c ",	"",	3,	{ "a", "b", "c" },
269	"a b c d e ",	"",	5,	{ "a", "b", "c", "d", "e" },
270	"a b\tc d e f",	"",	6,	{ "a", "b", "c", "d", "e f" },
271	" a b c d e f ",	"",	6,	{ "a", "b", "c", "d", "e f " },
272
273	NULL,		NULL,	0,	{ NULL },
274};
275
276void
277regress(void)
278{
279	char buf[512];
280	int n;
281	char *fields[RNF+1];
282	int nf;
283	int i;
284	int printit;
285	char *f;
286
287	for (n = 0; tests[n].str != NULL; n++) {
288		(void) strcpy(buf, tests[n].str);
289		fields[RNF] = NULL;
290		nf = split(buf, fields, RNF, tests[n].seps);
291		printit = 0;
292		if (nf != tests[n].nf) {
293			printf("split `%s' by `%s' gave %d fields, not %d\n",
294				tests[n].str, tests[n].seps, nf, tests[n].nf);
295			printit = 1;
296		} else if (fields[RNF] != NULL) {
297			printf("split() went beyond array end\n");
298			printit = 1;
299		} else {
300			for (i = 0; i < nf && i < RNF; i++) {
301				f = fields[i];
302				if (f == NULL)
303					f = "(NULL)";
304				if (strcmp(f, tests[n].fi[i]) != 0) {
305					printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
306						tests[n].str, tests[n].seps,
307						i, fields[i], tests[n].fi[i]);
308					printit = 1;
309				}
310			}
311		}
312		if (printit)
313			print(nf, RNF, fields);
314	}
315}
316#endif
317