parse.c revision 231578
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1989, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 4. Neither the name of the University nor the names of its contributors 141590Srgrimes * may be used to endorse or promote products derived from this software 151590Srgrimes * without specific prior written permission. 161590Srgrimes * 171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271590Srgrimes * SUCH DAMAGE. 281590Srgrimes */ 291590Srgrimes 301590Srgrimes#ifndef lint 3127315Scharnier#if 0 321590Srgrimesstatic char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93"; 3327315Scharnier#endif 341590Srgrimes#endif /* not lint */ 3599112Sobrien#include <sys/cdefs.h> 3699112Sobrien__FBSDID("$FreeBSD: stable/9/usr.bin/hexdump/parse.c 231578 2012-02-13 10:24:49Z tijl $"); 371590Srgrimes 381590Srgrimes#include <sys/types.h> 391590Srgrimes 4027315Scharnier#include <err.h> 411590Srgrimes#include <fcntl.h> 421590Srgrimes#include <stdio.h> 431590Srgrimes#include <stdlib.h> 441590Srgrimes#include <ctype.h> 451590Srgrimes#include <string.h> 461590Srgrimes#include "hexdump.h" 471590Srgrimes 481590SrgrimesFU *endfu; /* format at end-of-data */ 491590Srgrimes 501590Srgrimesvoid 51102944Sdwmaloneaddfile(char *name) 521590Srgrimes{ 53102944Sdwmalone unsigned char *p; 541590Srgrimes FILE *fp; 551590Srgrimes int ch; 561590Srgrimes char buf[2048 + 1]; 571590Srgrimes 581590Srgrimes if ((fp = fopen(name, "r")) == NULL) 5927315Scharnier err(1, "%s", name); 601590Srgrimes while (fgets(buf, sizeof(buf), fp)) { 611590Srgrimes if (!(p = index(buf, '\n'))) { 6227315Scharnier warnx("line too long"); 631590Srgrimes while ((ch = getchar()) != '\n' && ch != EOF); 641590Srgrimes continue; 651590Srgrimes } 661590Srgrimes *p = '\0'; 671590Srgrimes for (p = buf; *p && isspace(*p); ++p); 681590Srgrimes if (!*p || *p == '#') 691590Srgrimes continue; 701590Srgrimes add(p); 711590Srgrimes } 721590Srgrimes (void)fclose(fp); 731590Srgrimes} 741590Srgrimes 751590Srgrimesvoid 76102944Sdwmaloneadd(const char *fmt) 771590Srgrimes{ 7887203Smarkm unsigned const char *p, *savep; 791590Srgrimes static FS **nextfs; 801590Srgrimes FS *tfs; 811590Srgrimes FU *tfu, **nextfu; 821590Srgrimes 831590Srgrimes /* start new linked list of format units */ 8480290Sobrien if ((tfs = calloc(1, sizeof(FS))) == NULL) 8580290Sobrien err(1, NULL); 861590Srgrimes if (!fshead) 871590Srgrimes fshead = tfs; 881590Srgrimes else 891590Srgrimes *nextfs = tfs; 901590Srgrimes nextfs = &tfs->nextfs; 911590Srgrimes nextfu = &tfs->nextfu; 921590Srgrimes 931590Srgrimes /* take the format string and break it up into format units */ 941590Srgrimes for (p = fmt;;) { 951590Srgrimes /* skip leading white space */ 961590Srgrimes for (; isspace(*p); ++p); 971590Srgrimes if (!*p) 981590Srgrimes break; 991590Srgrimes 1001590Srgrimes /* allocate a new format unit and link it in */ 10180290Sobrien if ((tfu = calloc(1, sizeof(FU))) == NULL) 10280290Sobrien err(1, NULL); 1031590Srgrimes *nextfu = tfu; 1041590Srgrimes nextfu = &tfu->nextfu; 1051590Srgrimes tfu->reps = 1; 1061590Srgrimes 1071590Srgrimes /* if leading digit, repetition count */ 1081590Srgrimes if (isdigit(*p)) { 1091590Srgrimes for (savep = p; isdigit(*p); ++p); 1101590Srgrimes if (!isspace(*p) && *p != '/') 1111590Srgrimes badfmt(fmt); 1121590Srgrimes /* may overwrite either white space or slash */ 1131590Srgrimes tfu->reps = atoi(savep); 1141590Srgrimes tfu->flags = F_SETREP; 1151590Srgrimes /* skip trailing white space */ 1161590Srgrimes for (++p; isspace(*p); ++p); 1171590Srgrimes } 1181590Srgrimes 1191590Srgrimes /* skip slash and trailing white space */ 1201590Srgrimes if (*p == '/') 1211590Srgrimes while (isspace(*++p)); 1221590Srgrimes 1231590Srgrimes /* byte count */ 1241590Srgrimes if (isdigit(*p)) { 1251590Srgrimes for (savep = p; isdigit(*p); ++p); 1261590Srgrimes if (!isspace(*p)) 1271590Srgrimes badfmt(fmt); 1281590Srgrimes tfu->bcnt = atoi(savep); 1291590Srgrimes /* skip trailing white space */ 1301590Srgrimes for (++p; isspace(*p); ++p); 1311590Srgrimes } 1321590Srgrimes 1331590Srgrimes /* format */ 1341590Srgrimes if (*p != '"') 1351590Srgrimes badfmt(fmt); 1361590Srgrimes for (savep = ++p; *p != '"';) 1371590Srgrimes if (*p++ == 0) 1381590Srgrimes badfmt(fmt); 1391590Srgrimes if (!(tfu->fmt = malloc(p - savep + 1))) 14080290Sobrien err(1, NULL); 141194796Sdelphij (void) strlcpy(tfu->fmt, savep, p - savep + 1); 1421590Srgrimes escape(tfu->fmt); 1431590Srgrimes p++; 1441590Srgrimes } 1451590Srgrimes} 1461590Srgrimes 14791840Sobrienstatic const char *spec = ".#-+ 0123456789"; 1481590Srgrimes 1491590Srgrimesint 150102944Sdwmalonesize(FS *fs) 1511590Srgrimes{ 152102944Sdwmalone FU *fu; 153102944Sdwmalone int bcnt, cursize; 154102944Sdwmalone unsigned char *fmt; 1551590Srgrimes int prec; 1561590Srgrimes 1571590Srgrimes /* figure out the data block size needed for each format unit */ 1581590Srgrimes for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 1591590Srgrimes if (fu->bcnt) { 1601590Srgrimes cursize += fu->bcnt * fu->reps; 1611590Srgrimes continue; 1621590Srgrimes } 1631590Srgrimes for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 1641590Srgrimes if (*fmt != '%') 1651590Srgrimes continue; 1661590Srgrimes /* 1671590Srgrimes * skip any special chars -- save precision in 1681590Srgrimes * case it's a %s format. 1691590Srgrimes */ 1701590Srgrimes while (index(spec + 1, *++fmt)); 1711590Srgrimes if (*fmt == '.' && isdigit(*++fmt)) { 1721590Srgrimes prec = atoi(fmt); 1731590Srgrimes while (isdigit(*++fmt)); 1741590Srgrimes } 1751590Srgrimes switch(*fmt) { 1761590Srgrimes case 'c': 1771590Srgrimes bcnt += 1; 1781590Srgrimes break; 1791590Srgrimes case 'd': case 'i': case 'o': case 'u': 1801590Srgrimes case 'x': case 'X': 1811590Srgrimes bcnt += 4; 1821590Srgrimes break; 1831590Srgrimes case 'e': case 'E': case 'f': case 'g': case 'G': 1841590Srgrimes bcnt += 8; 1851590Srgrimes break; 1861590Srgrimes case 's': 1871590Srgrimes bcnt += prec; 1881590Srgrimes break; 1891590Srgrimes case '_': 1901590Srgrimes switch(*++fmt) { 1911590Srgrimes case 'c': case 'p': case 'u': 1921590Srgrimes bcnt += 1; 1931590Srgrimes break; 1941590Srgrimes } 1951590Srgrimes } 1961590Srgrimes } 1971590Srgrimes cursize += bcnt * fu->reps; 1981590Srgrimes } 1991590Srgrimes return (cursize); 2001590Srgrimes} 2011590Srgrimes 2021590Srgrimesvoid 203102944Sdwmalonerewrite(FS *fs) 2041590Srgrimes{ 2051590Srgrimes enum { NOTOKAY, USEBCNT, USEPREC } sokay; 206102944Sdwmalone PR *pr, **nextpr; 207102944Sdwmalone FU *fu; 20830921Sache unsigned char *p1, *p2, *fmtp; 20930921Sache char savech, cs[3]; 2101590Srgrimes int nconv, prec; 211161132Smaxim size_t len; 2121590Srgrimes 213132541Sjohan nextpr = NULL; 214132541Sjohan prec = 0; 215132541Sjohan 2161590Srgrimes for (fu = fs->nextfu; fu; fu = fu->nextfu) { 2171590Srgrimes /* 2181590Srgrimes * Break each format unit into print units; each conversion 2191590Srgrimes * character gets its own. 2201590Srgrimes */ 2211590Srgrimes for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 22280290Sobrien if ((pr = calloc(1, sizeof(PR))) == NULL) 22380290Sobrien err(1, NULL); 2241590Srgrimes if (!fu->nextpr) 2251590Srgrimes fu->nextpr = pr; 2261590Srgrimes else 2271590Srgrimes *nextpr = pr; 2281590Srgrimes 2291590Srgrimes /* Skip preceding text and up to the next % sign. */ 2301590Srgrimes for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 2311590Srgrimes 2321590Srgrimes /* Only text in the string. */ 2331590Srgrimes if (!*p1) { 2341590Srgrimes pr->fmt = fmtp; 2351590Srgrimes pr->flags = F_TEXT; 2361590Srgrimes break; 2371590Srgrimes } 2381590Srgrimes 2391590Srgrimes /* 2401590Srgrimes * Get precision for %s -- if have a byte count, don't 2411590Srgrimes * need it. 2421590Srgrimes */ 2431590Srgrimes if (fu->bcnt) { 2441590Srgrimes sokay = USEBCNT; 2451590Srgrimes /* Skip to conversion character. */ 2461590Srgrimes for (++p1; index(spec, *p1); ++p1); 2471590Srgrimes } else { 2481590Srgrimes /* Skip any special chars, field width. */ 2491590Srgrimes while (index(spec + 1, *++p1)); 2501590Srgrimes if (*p1 == '.' && isdigit(*++p1)) { 2511590Srgrimes sokay = USEPREC; 2521590Srgrimes prec = atoi(p1); 2531590Srgrimes while (isdigit(*++p1)); 2541590Srgrimes } else 2551590Srgrimes sokay = NOTOKAY; 2561590Srgrimes } 2571590Srgrimes 258231578Stijl p2 = *p1 ? p1 + 1 : p1; /* Set end pointer -- make sure 259231578Stijl * that it's non-NUL/-NULL first 260231578Stijl * though. */ 2611590Srgrimes cs[0] = *p1; /* Set conversion string. */ 2621590Srgrimes cs[1] = '\0'; 2631590Srgrimes 2641590Srgrimes /* 2651590Srgrimes * Figure out the byte count for each conversion; 2661590Srgrimes * rewrite the format as necessary, set up blank- 2671590Srgrimes * padding for end of data. 2681590Srgrimes */ 2691590Srgrimes switch(cs[0]) { 2701590Srgrimes case 'c': 2711590Srgrimes pr->flags = F_CHAR; 2721590Srgrimes switch(fu->bcnt) { 2731590Srgrimes case 0: case 1: 2741590Srgrimes pr->bcnt = 1; 2751590Srgrimes break; 2761590Srgrimes default: 2771590Srgrimes p1[1] = '\0'; 2781590Srgrimes badcnt(p1); 2791590Srgrimes } 2801590Srgrimes break; 2811590Srgrimes case 'd': case 'i': 2821590Srgrimes pr->flags = F_INT; 2831590Srgrimes goto isint; 2841590Srgrimes case 'o': case 'u': case 'x': case 'X': 2851590Srgrimes pr->flags = F_UINT; 2861590Srgrimesisint: cs[2] = '\0'; 2871590Srgrimes cs[1] = cs[0]; 2881590Srgrimes cs[0] = 'q'; 2891590Srgrimes switch(fu->bcnt) { 2901590Srgrimes case 0: case 4: 2911590Srgrimes pr->bcnt = 4; 2921590Srgrimes break; 2931590Srgrimes case 1: 2941590Srgrimes pr->bcnt = 1; 2951590Srgrimes break; 2961590Srgrimes case 2: 2971590Srgrimes pr->bcnt = 2; 2981590Srgrimes break; 2991590Srgrimes default: 3001590Srgrimes p1[1] = '\0'; 3011590Srgrimes badcnt(p1); 3021590Srgrimes } 3031590Srgrimes break; 3041590Srgrimes case 'e': case 'E': case 'f': case 'g': case 'G': 3051590Srgrimes pr->flags = F_DBL; 3061590Srgrimes switch(fu->bcnt) { 3071590Srgrimes case 0: case 8: 3081590Srgrimes pr->bcnt = 8; 3091590Srgrimes break; 3101590Srgrimes case 4: 3111590Srgrimes pr->bcnt = 4; 3121590Srgrimes break; 3131590Srgrimes default: 31496795Stjr if (fu->bcnt == sizeof(long double)) { 31596795Stjr cs[2] = '\0'; 31696795Stjr cs[1] = cs[0]; 31796795Stjr cs[0] = 'L'; 31896795Stjr pr->bcnt = sizeof(long double); 31996795Stjr } else { 32096795Stjr p1[1] = '\0'; 32196795Stjr badcnt(p1); 32296795Stjr } 3231590Srgrimes } 3241590Srgrimes break; 3251590Srgrimes case 's': 3261590Srgrimes pr->flags = F_STR; 3271590Srgrimes switch(sokay) { 3281590Srgrimes case NOTOKAY: 3291590Srgrimes badsfmt(); 3301590Srgrimes case USEBCNT: 3311590Srgrimes pr->bcnt = fu->bcnt; 3321590Srgrimes break; 3331590Srgrimes case USEPREC: 3341590Srgrimes pr->bcnt = prec; 3351590Srgrimes break; 3361590Srgrimes } 3371590Srgrimes break; 3381590Srgrimes case '_': 3391590Srgrimes ++p2; 3401590Srgrimes switch(p1[1]) { 3411590Srgrimes case 'A': 3421590Srgrimes endfu = fu; 3431590Srgrimes fu->flags |= F_IGNORE; 3441590Srgrimes /* FALLTHROUGH */ 3451590Srgrimes case 'a': 3461590Srgrimes pr->flags = F_ADDRESS; 3471590Srgrimes ++p2; 3481590Srgrimes switch(p1[2]) { 3491590Srgrimes case 'd': case 'o': case'x': 3501590Srgrimes cs[0] = 'q'; 3511590Srgrimes cs[1] = p1[2]; 3521590Srgrimes cs[2] = '\0'; 3531590Srgrimes break; 3541590Srgrimes default: 3551590Srgrimes p1[3] = '\0'; 3561590Srgrimes badconv(p1); 3571590Srgrimes } 3581590Srgrimes break; 3591590Srgrimes case 'c': 3601590Srgrimes pr->flags = F_C; 3611590Srgrimes /* cs[0] = 'c'; set in conv_c */ 3621590Srgrimes goto isint2; 3631590Srgrimes case 'p': 3641590Srgrimes pr->flags = F_P; 3651590Srgrimes cs[0] = 'c'; 3661590Srgrimes goto isint2; 3671590Srgrimes case 'u': 3681590Srgrimes pr->flags = F_U; 3691590Srgrimes /* cs[0] = 'c'; set in conv_u */ 3701590Srgrimesisint2: switch(fu->bcnt) { 3711590Srgrimes case 0: case 1: 3721590Srgrimes pr->bcnt = 1; 3731590Srgrimes break; 3741590Srgrimes default: 3751590Srgrimes p1[2] = '\0'; 3761590Srgrimes badcnt(p1); 3771590Srgrimes } 3781590Srgrimes break; 3791590Srgrimes default: 3801590Srgrimes p1[2] = '\0'; 3811590Srgrimes badconv(p1); 3821590Srgrimes } 3831590Srgrimes break; 3841590Srgrimes default: 3851590Srgrimes p1[1] = '\0'; 3861590Srgrimes badconv(p1); 3871590Srgrimes } 3881590Srgrimes 3891590Srgrimes /* 3901590Srgrimes * Copy to PR format string, set conversion character 3911590Srgrimes * pointer, update original. 3921590Srgrimes */ 3931590Srgrimes savech = *p2; 3941590Srgrimes p1[0] = '\0'; 395161132Smaxim len = strlen(fmtp) + strlen(cs) + 1; 396161132Smaxim if ((pr->fmt = calloc(1, len)) == NULL) 39780290Sobrien err(1, NULL); 398161132Smaxim snprintf(pr->fmt, len, "%s%s", fmtp, cs); 3991590Srgrimes *p2 = savech; 4001590Srgrimes pr->cchar = pr->fmt + (p1 - fmtp); 4011590Srgrimes fmtp = p2; 4021590Srgrimes 4031590Srgrimes /* Only one conversion character if byte count. */ 4041590Srgrimes if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 40527315Scharnier errx(1, "byte count with multiple conversion characters"); 4061590Srgrimes } 4071590Srgrimes /* 4081590Srgrimes * If format unit byte count not specified, figure it out 4091590Srgrimes * so can adjust rep count later. 4101590Srgrimes */ 4111590Srgrimes if (!fu->bcnt) 4121590Srgrimes for (pr = fu->nextpr; pr; pr = pr->nextpr) 4131590Srgrimes fu->bcnt += pr->bcnt; 4141590Srgrimes } 4151590Srgrimes /* 4161590Srgrimes * If the format string interprets any data at all, and it's 4171590Srgrimes * not the same as the blocksize, and its last format unit 4181590Srgrimes * interprets any data at all, and has no iteration count, 4191590Srgrimes * repeat it as necessary. 4201590Srgrimes * 4211590Srgrimes * If, rep count is greater than 1, no trailing whitespace 4221590Srgrimes * gets output from the last iteration of the format unit. 4231590Srgrimes */ 42497329Stjr for (fu = fs->nextfu; fu; fu = fu->nextfu) { 4251590Srgrimes if (!fu->nextfu && fs->bcnt < blocksize && 4261590Srgrimes !(fu->flags&F_SETREP) && fu->bcnt) 4271590Srgrimes fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 4281590Srgrimes if (fu->reps > 1) { 4291590Srgrimes for (pr = fu->nextpr;; pr = pr->nextpr) 4301590Srgrimes if (!pr->nextpr) 4311590Srgrimes break; 4321590Srgrimes for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 4331590Srgrimes p2 = isspace(*p1) ? p1 : NULL; 4341590Srgrimes if (p2) 4351590Srgrimes pr->nospace = p2; 4361590Srgrimes } 4371590Srgrimes } 4381590Srgrimes#ifdef DEBUG 4391590Srgrimes for (fu = fs->nextfu; fu; fu = fu->nextfu) { 4401590Srgrimes (void)printf("fmt:"); 4411590Srgrimes for (pr = fu->nextpr; pr; pr = pr->nextpr) 4421590Srgrimes (void)printf(" {%s}", pr->fmt); 4431590Srgrimes (void)printf("\n"); 4441590Srgrimes } 4451590Srgrimes#endif 4461590Srgrimes} 4471590Srgrimes 4481590Srgrimesvoid 449102944Sdwmaloneescape(char *p1) 4501590Srgrimes{ 451102944Sdwmalone char *p2; 4521590Srgrimes 4531590Srgrimes /* alphabetic escape sequences have to be done in place */ 454231578Stijl for (p2 = p1;; p1++, p2++) { 455231578Stijl if (*p1 == '\\') { 456231578Stijl p1++; 457231578Stijl switch(*p1) { 458231578Stijl case '\0': 459231578Stijl *p2 = '\\'; 460231578Stijl *++p2 = '\0'; 461231578Stijl return; 4621590Srgrimes case 'a': 4631590Srgrimes /* *p2 = '\a'; */ 4641590Srgrimes *p2 = '\007'; 4651590Srgrimes break; 4661590Srgrimes case 'b': 4671590Srgrimes *p2 = '\b'; 4681590Srgrimes break; 4691590Srgrimes case 'f': 4701590Srgrimes *p2 = '\f'; 4711590Srgrimes break; 4721590Srgrimes case 'n': 4731590Srgrimes *p2 = '\n'; 4741590Srgrimes break; 4751590Srgrimes case 'r': 4761590Srgrimes *p2 = '\r'; 4771590Srgrimes break; 4781590Srgrimes case 't': 4791590Srgrimes *p2 = '\t'; 4801590Srgrimes break; 4811590Srgrimes case 'v': 4821590Srgrimes *p2 = '\v'; 4831590Srgrimes break; 4841590Srgrimes default: 4851590Srgrimes *p2 = *p1; 4861590Srgrimes break; 4871590Srgrimes } 488231578Stijl } else { 489231578Stijl *p2 = *p1; 490231578Stijl if (*p1 == '\0') 491231578Stijl return; 492231578Stijl } 4931590Srgrimes } 4941590Srgrimes} 4951590Srgrimes 4961590Srgrimesvoid 497102944Sdwmalonebadcnt(char *s) 4981590Srgrimes{ 49927315Scharnier errx(1, "%s: bad byte count", s); 5001590Srgrimes} 5011590Srgrimes 5021590Srgrimesvoid 503102944Sdwmalonebadsfmt(void) 5041590Srgrimes{ 50527315Scharnier errx(1, "%%s: requires a precision or a byte count"); 5061590Srgrimes} 5071590Srgrimes 5081590Srgrimesvoid 509102944Sdwmalonebadfmt(const char *fmt) 5101590Srgrimes{ 51127315Scharnier errx(1, "\"%s\": bad format", fmt); 5121590Srgrimes} 5131590Srgrimes 5141590Srgrimesvoid 515102944Sdwmalonebadconv(char *ch) 5161590Srgrimes{ 51727315Scharnier errx(1, "%%%s: bad conversion character", ch); 5181590Srgrimes} 519