interp_parse.c revision 44570
1290001Sglebius/* 2290001Sglebius * Redistribution and use in source and binary forms, with or without 3290001Sglebius * modification, are permitted provided that the following conditions 4290001Sglebius * are met: 5290001Sglebius * 1. Redistributions of source code must retain the above copyright 6290001Sglebius * notice, this list of conditions and the following disclaimer. 7290001Sglebius * 2. Redistributions in binary form must reproduce the above copyright 8290001Sglebius * notice, this list of conditions and the following disclaimer in the 9290001Sglebius * documentation and/or other materials provided with the distribution. 10290001Sglebius * 11290001Sglebius * Jordan K. Hubbard 12290001Sglebius * 29 August 1998 13290001Sglebius * 14290001Sglebius * $Id: interp_parse.c,v 1.6 1999/01/13 08:11:41 msmith Exp $ 15290001Sglebius * 16290001Sglebius * The meat of the simple parser. 17290001Sglebius */ 18290001Sglebius 19290001Sglebius#include <stand.h> 20290001Sglebius#include <string.h> 21290001Sglebius 22290001Sglebius/* Forward decls */ 23290001Sglebiusextern char *backslash(char *str); 24290001Sglebius 25290001Sglebiusstatic void clean(void); 26290001Sglebiusstatic int insert(int *argcp, char *buf); 27290001Sglebiusstatic char *variable_lookup(char *name); 28290001Sglebius 29290001Sglebius#define PARSE_BUFSIZE 1024 /* maximum size of one element */ 30290001Sglebius#define MAXARGS 20 /* maximum number of elements */ 31290001Sglebiusstatic char *args[MAXARGS]; 32290001Sglebius 33290001Sglebius/* 34290001Sglebius * parse: accept a string of input and "parse" it for backslash 35290001Sglebius * substitutions and environment variable expansions (${var}), 36290001Sglebius * returning an argc/argv style vector of whitespace separated 37290001Sglebius * arguments. Returns 0 on success, 1 on failure (ok, ok, so I 38290001Sglebius * wimped-out on the error codes! :). 39290001Sglebius * 40290001Sglebius * Note that the argv array returned must be freed by the caller, but 41290001Sglebius * we own the space allocated for arguments and will free that on next 42290001Sglebius * invocation. This allows argv consumers to modify the array if 43290001Sglebius * required. 44290001Sglebius * 45290001Sglebius * NB: environment variables that expand to more than one whitespace 46290001Sglebius * separated token will be returned as a single argv[] element, not 47290001Sglebius * split in turn. Expanded text is also immune to further backslash 48290001Sglebius * elimination or expansion since this is a one-pass, non-recursive 49290001Sglebius * parser. You didn't specify more than this so if you want more, ask 50290001Sglebius * me. - jkh 51290001Sglebius */ 52290001Sglebius 53290001Sglebius#define PARSE_FAIL(expr) \ 54290001Sglebiusif (expr) { \ 55290001Sglebius printf("fail at line %d\n", __LINE__); \ 56290001Sglebius clean(); \ 57290001Sglebius free(copy); \ 58290001Sglebius free(buf); \ 59290001Sglebius return 1; \ 60290001Sglebius} 61290001Sglebius 62290001Sglebius/* Accept the usual delimiters for a variable, returning counterpart */ 63290001Sglebiusstatic char 64290001Sglebiusisdelim(char ch) 65290001Sglebius{ 66290001Sglebius if (ch == '{') 67290001Sglebius return '}'; 68290001Sglebius else if (ch == '(') 69290001Sglebius return ')'; 70290001Sglebius return '\0'; 71290001Sglebius} 72290001Sglebius 73290001Sglebiusstatic int 74290001Sglebiusisquote(char ch) 75290001Sglebius{ 76290001Sglebius return (ch == '\'' || ch == '"'); 77290001Sglebius} 78290001Sglebius 79290001Sglebiusint 80290001Sglebiusparse(int *argc, char ***argv, char *str) 81290001Sglebius{ 82290001Sglebius int ac; 83290001Sglebius char *val, *p, *q, *copy = NULL; 84290001Sglebius int i = 0; 85290001Sglebius char token, tmp, quote, *buf; 86290001Sglebius enum { STR, VAR, WHITE } state; 87290001Sglebius 88290001Sglebius ac = *argc = 0; 89290001Sglebius quote = 0; 90290001Sglebius if (!str || (p = copy = backslash(str)) == NULL) 91290001Sglebius return 1; 92290001Sglebius 93290001Sglebius /* Initialize vector and state */ 94290001Sglebius clean(); 95290001Sglebius state = STR; 96290001Sglebius buf = (char *)malloc(PARSE_BUFSIZE); 97290001Sglebius token = 0; 98290001Sglebius 99290001Sglebius /* And awaaaaaaaaay we go! */ 100290001Sglebius while (*p) { 101290001Sglebius switch (state) { 102290001Sglebius case STR: 103290001Sglebius if ((*p == '\\') && p[1]) { 104290001Sglebius p++; 105290001Sglebius PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); 106290001Sglebius buf[i++] = *p++; 107290001Sglebius } else if (isquote(*p)) { 108290001Sglebius quote = quote ? 0 : *p; 109290001Sglebius ++p; 110290001Sglebius } 111290001Sglebius else if (isspace(*p) && !quote) { 112290001Sglebius state = WHITE; 113290001Sglebius if (i) { 114290001Sglebius buf[i] = '\0'; 115290001Sglebius PARSE_FAIL(insert(&ac, buf)); 116290001Sglebius i = 0; 117290001Sglebius } 118290001Sglebius ++p; 119290001Sglebius } else if (*p == '$') { 120290001Sglebius token = isdelim(*(p + 1)); 121290001Sglebius if (token) 122290001Sglebius p += 2; 123290001Sglebius else 124290001Sglebius ++p; 125290001Sglebius state = VAR; 126290001Sglebius } else { 127290001Sglebius PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); 128290001Sglebius buf[i++] = *p++; 129290001Sglebius } 130290001Sglebius break; 131290001Sglebius 132290001Sglebius case WHITE: 133290001Sglebius if (isspace(*p)) 134290001Sglebius ++p; 135290001Sglebius else 136290001Sglebius state = STR; 137290001Sglebius break; 138290001Sglebius 139290001Sglebius case VAR: 140290001Sglebius if (token) { 141290001Sglebius PARSE_FAIL((q = index(p, token)) == NULL); 142290001Sglebius } else { 143290001Sglebius q = p; 144290001Sglebius while (*q && !isspace(*q)) 145290001Sglebius ++q; 146290001Sglebius } 147290001Sglebius tmp = *q; 148290001Sglebius *q = '\0'; 149290001Sglebius if ((val = variable_lookup(p)) != NULL) { 150290001Sglebius int len = strlen(val); 151290001Sglebius 152290001Sglebius strncpy(buf + i, val, PARSE_BUFSIZE - (i + 1)); 153290001Sglebius i += min(len, PARSE_BUFSIZE - 1); 154290001Sglebius } 155290001Sglebius *q = tmp; /* restore value */ 156290001Sglebius p = q + (token ? 1 : 0); 157290001Sglebius state = STR; 158290001Sglebius break; 159290001Sglebius } 160290001Sglebius } 161290001Sglebius /* If at end of token, add it */ 162290001Sglebius if (i && state == STR) { 163290001Sglebius buf[i] = '\0'; 164290001Sglebius PARSE_FAIL(insert(&ac, buf)); 165290001Sglebius } 166290001Sglebius args[ac] = NULL; 167290001Sglebius *argc = ac; 168290001Sglebius *argv = (char **)malloc((sizeof(char *) * ac + 1)); 169290001Sglebius bcopy(args, *argv, sizeof(char *) * ac + 1); 170290001Sglebius free(buf); 171290001Sglebius free(copy); 172290001Sglebius return 0; 173290001Sglebius} 174290001Sglebius 175290001Sglebius#define MAXARGS 20 176290001Sglebius 177290001Sglebius/* Clean vector space */ 178290001Sglebiusstatic void 179290001Sglebiusclean(void) 180290001Sglebius{ 181290001Sglebius int i; 182290001Sglebius 183290001Sglebius for (i = 0; i < MAXARGS; i++) { 184290001Sglebius if (args[i] != NULL) { 185290001Sglebius free(args[i]); 186290001Sglebius args[i] = NULL; 187290001Sglebius } 188290001Sglebius } 189290001Sglebius} 190290001Sglebius 191290001Sglebiusstatic int 192290001Sglebiusinsert(int *argcp, char *buf) 193290001Sglebius{ 194290001Sglebius if (*argcp >= MAXARGS) 195290001Sglebius return 1; 196290001Sglebius args[(*argcp)++] = strdup(buf); 197290001Sglebius return 0; 198290001Sglebius} 199290001Sglebius 200290001Sglebiusstatic char * 201290001Sglebiusvariable_lookup(char *name) 202290001Sglebius{ 203290001Sglebius /* XXX search "special variable" space first? */ 204290001Sglebius return (char *)getenv(name); 205290001Sglebius} 206290001Sglebius