1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35#if 0
36static char sccsid[] = "@(#)parse.c	8.1 (Berkeley) 6/6/93";
37#endif
38#endif /* not lint */
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: src/usr.bin/hexdump/parse.c,v 1.12 2002/09/04 23:29:01 dwmalone Exp $");
41
42#include <sys/types.h>
43
44#include <err.h>
45#include <fcntl.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <ctype.h>
49#include <string.h>
50#include "hexdump.h"
51
52FU *endfu;					/* format at end-of-data */
53
54void
55addfile(char *name)
56{
57	unsigned char *p;
58	FILE *fp;
59	int ch;
60	char buf[2048 + 1];
61
62	if ((fp = fopen(name, "r")) == NULL)
63		err(1, "%s", name);
64	while (fgets(buf, sizeof(buf), fp)) {
65		if (!(p = (unsigned char *)index(buf, '\n'))) {
66			warnx("line too long");
67			while ((ch = getchar()) != '\n' && ch != EOF);
68			continue;
69		}
70		*p = '\0';
71		for (p = (unsigned char *)buf; *p && isspace(*p); ++p);
72		if (!*p || *p == '#')
73			continue;
74		add((const char *)p);
75	}
76	(void)fclose(fp);
77}
78
79void
80add(const char *fmt)
81{
82	unsigned const char *p, *savep;
83	static FS **nextfs;
84	FS *tfs;
85	FU *tfu, **nextfu;
86
87	/* start new linked list of format units */
88	if ((tfs = calloc(1, sizeof(FS))) == NULL)
89		err(1, NULL);
90	if (!fshead)
91		fshead = tfs;
92	else
93		*nextfs = tfs;
94	nextfs = &tfs->nextfs;
95	nextfu = &tfs->nextfu;
96
97	/* take the format string and break it up into format units */
98	for (p = (unsigned const char *)fmt;;) {
99		/* skip leading white space */
100		for (; isspace(*p); ++p);
101		if (!*p)
102			break;
103
104		/* allocate a new format unit and link it in */
105		if ((tfu = calloc(1, sizeof(FU))) == NULL)
106			err(1, NULL);
107		*nextfu = tfu;
108		nextfu = &tfu->nextfu;
109		tfu->reps = 1;
110
111		/* if leading digit, repetition count */
112		if (isdigit(*p)) {
113			for (savep = p; isdigit(*p); ++p);
114			if (!isspace(*p) && *p != '/')
115				badfmt(fmt);
116			/* may overwrite either white space or slash */
117			tfu->reps = atoi((const char *)savep);
118			tfu->flags = F_SETREP;
119			/* skip trailing white space */
120			for (++p; isspace(*p); ++p);
121		}
122
123		/* skip slash and trailing white space */
124		if (*p == '/')
125			while (isspace(*++p));
126
127		/* byte count */
128		if (isdigit(*p)) {
129			for (savep = p; isdigit(*p); ++p);
130			if (!isspace(*p))
131				badfmt(fmt);
132			tfu->bcnt = atoi((const char *)savep);
133			/* skip trailing white space */
134			for (++p; isspace(*p); ++p);
135		}
136
137		/* format */
138		if (*p != '"')
139			badfmt(fmt);
140		for (savep = ++p; *p != '"';)
141			if (*p++ == 0)
142				badfmt(fmt);
143		if (!(tfu->fmt = malloc(p - savep + 1)))
144			err(1, NULL);
145		(void) strncpy(tfu->fmt, (const char *)savep, p - savep);
146		tfu->fmt[p - savep] = '\0';
147		escape(tfu->fmt);
148		p++;
149	}
150}
151
152static const char *spec = ".#-+ 0123456789";
153
154int
155size(FS *fs)
156{
157	FU *fu;
158	int bcnt, cursize;
159	unsigned char *fmt;
160	int prec;
161
162	/* figure out the data block size needed for each format unit */
163	for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
164		if (fu->bcnt) {
165			cursize += fu->bcnt * fu->reps;
166			continue;
167		}
168		for (bcnt = prec = 0, fmt = (unsigned char *)fu->fmt; *fmt; ++fmt) {
169			if (*fmt != '%')
170				continue;
171			/*
172			 * skip any special chars -- save precision in
173			 * case it's a %s format.
174			 */
175			while (index(spec + 1, *++fmt));
176			if (*fmt == '.' && isdigit(*++fmt)) {
177				prec = atoi((const char *)fmt);
178				while (isdigit(*++fmt));
179			}
180			switch(*fmt) {
181			case 'c':
182				bcnt += 1;
183				break;
184			case 'd': case 'i': case 'o': case 'u':
185			case 'x': case 'X':
186				bcnt += 4;
187				break;
188			case 'e': case 'E': case 'f': case 'g': case 'G':
189				bcnt += 8;
190				break;
191			case 's':
192				bcnt += prec;
193				break;
194			case '_':
195				switch(*++fmt) {
196				case 'c': case 'p': case 'u':
197					bcnt += 1;
198					break;
199				}
200			}
201		}
202		cursize += bcnt * fu->reps;
203	}
204	return (cursize);
205}
206
207void
208rewrite(FS *fs)
209{
210	enum { NOTOKAY, USEBCNT, USEPREC } sokay;
211	PR *pr, **nextpr = NULL;
212	FU *fu;
213	unsigned char *p1, *p2, *fmtp;
214	char savech, cs[3];
215	int nconv, prec = 0;
216
217	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
218		/*
219		 * Break each format unit into print units; each conversion
220		 * character gets its own.
221		 */
222		for (nconv = 0, fmtp = (unsigned char *)fu->fmt; *fmtp; nextpr = &pr->nextpr) {
223			if ((pr = calloc(1, sizeof(PR))) == NULL)
224				err(1, NULL);
225			if (!fu->nextpr)
226				fu->nextpr = pr;
227			else
228				*nextpr = pr;
229
230			/* Skip preceding text and up to the next % sign. */
231			for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
232
233			/* Only text in the string. */
234			if (!*p1) {
235				pr->fmt = (char *)fmtp;
236				pr->flags = F_TEXT;
237				break;
238			}
239
240			/*
241			 * Get precision for %s -- if have a byte count, don't
242			 * need it.
243			 */
244			if (fu->bcnt) {
245				sokay = USEBCNT;
246				/* Skip to conversion character. */
247				for (++p1; index(spec, *p1); ++p1);
248			} else {
249				/* Skip any special chars, field width. */
250				while (index(spec + 1, *++p1));
251				if (*p1 == '.' && isdigit(*++p1)) {
252					sokay = USEPREC;
253					prec = atoi((const char *)p1);
254					while (isdigit(*++p1));
255				} else
256					sokay = NOTOKAY;
257			}
258
259			p2 = p1 + 1;		/* Set end pointer. */
260			cs[0] = *p1;		/* Set conversion string. */
261			cs[1] = '\0';
262
263			/*
264			 * Figure out the byte count for each conversion;
265			 * rewrite the format as necessary, set up blank-
266			 * padding for end of data.
267			 */
268			switch(cs[0]) {
269			case 'c':
270				pr->flags = F_CHAR;
271				switch(fu->bcnt) {
272				case 0: case 1:
273					pr->bcnt = 1;
274					break;
275				default:
276					p1[1] = '\0';
277					badcnt((char *)p1);
278				}
279				break;
280			case 'd': case 'i':
281				pr->flags = F_INT;
282				goto isint;
283			case 'o': case 'u': case 'x': case 'X':
284				pr->flags = F_UINT;
285isint:				cs[2] = '\0';
286				cs[1] = cs[0];
287				cs[0] = 'q';
288				switch(fu->bcnt) {
289				case 0: case 4:
290					pr->bcnt = 4;
291					break;
292				case 1:
293					pr->bcnt = 1;
294					break;
295				case 2:
296					pr->bcnt = 2;
297					break;
298#ifdef __APPLE__
299				case 8:
300					pr->bcnt = 8;
301					break;
302#endif /* __APPLE__ */
303				default:
304					p1[1] = '\0';
305					badcnt((char *)p1);
306				}
307				break;
308			case 'e': case 'E': case 'f': case 'g': case 'G':
309				pr->flags = F_DBL;
310				switch(fu->bcnt) {
311				case 0: case 8:
312					pr->bcnt = 8;
313					break;
314				case 4:
315					pr->bcnt = 4;
316					break;
317				default:
318					if (fu->bcnt == sizeof(long double)) {
319						cs[2] = '\0';
320						cs[1] = cs[0];
321						cs[0] = 'L';
322						pr->bcnt = sizeof(long double);
323					} else {
324						p1[1] = '\0';
325						badcnt((char *)p1);
326					}
327				}
328				break;
329			case 's':
330				pr->flags = F_STR;
331				switch(sokay) {
332				case NOTOKAY:
333					badsfmt();
334				case USEBCNT:
335					pr->bcnt = fu->bcnt;
336					break;
337				case USEPREC:
338					pr->bcnt = prec;
339					break;
340				}
341				break;
342			case '_':
343				++p2;
344				switch(p1[1]) {
345				case 'A':
346					endfu = fu;
347					fu->flags |= F_IGNORE;
348					/* FALLTHROUGH */
349				case 'a':
350					pr->flags = F_ADDRESS;
351					++p2;
352					switch(p1[2]) {
353					case 'd': case 'o': case'x':
354						cs[0] = 'q';
355						cs[1] = p1[2];
356						cs[2] = '\0';
357						break;
358					default:
359						p1[3] = '\0';
360						badconv((char *)p1);
361					}
362					break;
363				case 'c':
364					pr->flags = F_C;
365					/* cs[0] = 'c';	set in conv_c */
366					goto isint2;
367				case 'p':
368					pr->flags = F_P;
369					cs[0] = 'c';
370					goto isint2;
371				case 'u':
372					pr->flags = F_U;
373					/* cs[0] = 'c';	set in conv_u */
374isint2:					switch(fu->bcnt) {
375					case 0: case 1:
376						pr->bcnt = 1;
377						break;
378					default:
379						p1[2] = '\0';
380						badcnt((char *)p1);
381					}
382					break;
383				case 'n': /* Force -A n to dump extra blank line like default od behavior */
384					endfu = fu;
385					fu->flags = F_IGNORE;
386					pr->flags = F_TEXT;
387					fmtp = (unsigned char *)"\n";
388					cs[0] = '\0';
389					break;
390				default:
391					p1[2] = '\0';
392					badconv((char *)p1);
393				}
394				break;
395			default:
396				p1[1] = '\0';
397				badconv((char *)p1);
398			}
399
400			/*
401			 * Copy to PR format string, set conversion character
402			 * pointer, update original.
403			 */
404			savech = *p2;
405			p1[0] = '\0';
406			if ((pr->fmt = calloc(1, strlen((const char *)fmtp) + 2)) == NULL)
407				err(1, NULL);
408			(void)strcpy(pr->fmt, (const char *)fmtp);
409			(void)strcat(pr->fmt, cs);
410			*p2 = savech;
411			pr->cchar = pr->fmt + (p1 - fmtp);
412			fmtp = p2;
413
414			/* Only one conversion character if byte count. */
415			if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
416				errx(1, "byte count with multiple conversion characters");
417		}
418		/*
419		 * If format unit byte count not specified, figure it out
420		 * so can adjust rep count later.
421		 */
422		if (!fu->bcnt)
423			for (pr = fu->nextpr; pr; pr = pr->nextpr)
424				fu->bcnt += pr->bcnt;
425	}
426	/*
427	 * If the format string interprets any data at all, and it's
428	 * not the same as the blocksize, and its last format unit
429	 * interprets any data at all, and has no iteration count,
430	 * repeat it as necessary.
431	 *
432	 * If, rep count is greater than 1, no trailing whitespace
433	 * gets output from the last iteration of the format unit.
434	 */
435	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
436		if (!fu->nextfu && fs->bcnt < blocksize &&
437		    !(fu->flags&F_SETREP) && fu->bcnt)
438			fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
439		if (fu->reps > 1) {
440			for (pr = fu->nextpr;; pr = pr->nextpr)
441				if (!pr->nextpr)
442					break;
443			for (p1 = (unsigned char *)pr->fmt, p2 = NULL; *p1; ++p1)
444				p2 = isspace(*p1) ? p1 : NULL;
445			if (p2)
446				pr->nospace = (char *)p2;
447		}
448	}
449#ifdef DEBUG
450	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
451		(void)printf("fmt:");
452		for (pr = fu->nextpr; pr; pr = pr->nextpr)
453			(void)printf(" {%s}", pr->fmt);
454		(void)printf("\n");
455	}
456#endif
457}
458
459void
460escape(char *p1)
461{
462	char *p2;
463
464	/* alphabetic escape sequences have to be done in place */
465	for (p2 = p1;; ++p1, ++p2) {
466		if (!*p1) {
467			*p2 = *p1;
468			break;
469		}
470		if (*p1 == '\\')
471			switch(*++p1) {
472			case 'a':
473			     /* *p2 = '\a'; */
474				*p2 = '\007';
475				break;
476			case 'b':
477				*p2 = '\b';
478				break;
479			case 'f':
480				*p2 = '\f';
481				break;
482			case 'n':
483				*p2 = '\n';
484				break;
485			case 'r':
486				*p2 = '\r';
487				break;
488			case 't':
489				*p2 = '\t';
490				break;
491			case 'v':
492				*p2 = '\v';
493				break;
494			default:
495				*p2 = *p1;
496				break;
497			}
498	}
499}
500
501void
502badcnt(char *s)
503{
504	errx(1, "%s: bad byte count", s);
505}
506
507void
508badsfmt(void)
509{
510	errx(1, "%%s: requires a precision or a byte count");
511}
512
513void
514badfmt(const char *fmt)
515{
516	errx(1, "\"%s\": bad format", fmt);
517}
518
519void
520badconv(char *ch)
521{
522	errx(1, "%%%s: bad conversion character", ch);
523}
524