1285612Sdelphij/** 2285612Sdelphij * \file cook.c 3181834Sroberto * 4181834Sroberto * This file contains the routines that deal with processing quoted strings 5181834Sroberto * into an internal format. 6285612Sdelphij * 7285612Sdelphij * @addtogroup autoopts 8285612Sdelphij * @{ 9181834Sroberto */ 10181834Sroberto/* 11285612Sdelphij * This file is part of AutoOpts, a companion to AutoGen. 12285612Sdelphij * AutoOpts is free software. 13285612Sdelphij * AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved 14181834Sroberto * 15285612Sdelphij * AutoOpts is available under any one of two licenses. The license 16285612Sdelphij * in use must be one of these two and the choice is under the control 17285612Sdelphij * of the user of the license. 18181834Sroberto * 19285612Sdelphij * The GNU Lesser General Public License, version 3 or later 20285612Sdelphij * See the files "COPYING.lgplv3" and "COPYING.gplv3" 21181834Sroberto * 22285612Sdelphij * The Modified Berkeley Software Distribution License 23285612Sdelphij * See the file "COPYING.mbsd" 24181834Sroberto * 25285612Sdelphij * These files have the following sha256 sums: 26181834Sroberto * 27285612Sdelphij * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 28285612Sdelphij * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 29285612Sdelphij * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 30181834Sroberto */ 31181834Sroberto 32181834Sroberto/* = = = START-STATIC-FORWARD = = = */ 33285612Sdelphijstatic bool 34285612Sdelphijcontiguous_quote(char ** pps, char * pq, int * lnct_p); 35181834Sroberto/* = = = END-STATIC-FORWARD = = = */ 36181834Sroberto 37181834Sroberto/*=export_func ao_string_cook_escape_char 38181834Sroberto * private: 39181834Sroberto * 40181834Sroberto * what: escape-process a string fragment 41285612Sdelphij * arg: + char const * + pzScan + points to character after the escape + 42285612Sdelphij * arg: + char * + pRes + Where to put the result byte + 43181834Sroberto * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 44181834Sroberto * 45181834Sroberto * ret-type: unsigned int 46181834Sroberto * ret-desc: The number of bytes consumed processing the escaped character. 47181834Sroberto * 48181834Sroberto * doc: 49181834Sroberto * 50181834Sroberto * This function converts "t" into "\t" and all your other favorite 51181834Sroberto * escapes, including numeric ones: hex and ocatal, too. 52181834Sroberto * The returned result tells the caller how far to advance the 53181834Sroberto * scan pointer (passed in). The default is to just pass through the 54181834Sroberto * escaped character and advance the scan by one. 55181834Sroberto * 56181834Sroberto * Some applications need to keep an escaped newline, others need to 57181834Sroberto * suppress it. This is accomplished by supplying a '\n' replacement 58181834Sroberto * character that is different from \n, if need be. For example, use 59181834Sroberto * 0x7F and never emit a 0x7F. 60181834Sroberto * 61181834Sroberto * err: @code{NULL} is returned if the string is mal-formed. 62181834Sroberto=*/ 63181834Srobertounsigned int 64285612Sdelphijao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl) 65181834Sroberto{ 66285612Sdelphij unsigned int res = 1; 67181834Sroberto 68181834Sroberto switch (*pRes = *pzIn++) { 69181834Sroberto case NUL: /* NUL - end of input string */ 70181834Sroberto return 0; 71181834Sroberto case '\r': 72285612Sdelphij if (*pzIn != NL) 73181834Sroberto return 1; 74181834Sroberto res++; 75181834Sroberto /* FALLTHROUGH */ 76285612Sdelphij case NL: /* NL - emit newline */ 77181834Sroberto *pRes = (char)nl; 78181834Sroberto return res; 79181834Sroberto 80181834Sroberto case 'a': *pRes = '\a'; break; 81181834Sroberto case 'b': *pRes = '\b'; break; 82181834Sroberto case 'f': *pRes = '\f'; break; 83285612Sdelphij case 'n': *pRes = NL; break; 84181834Sroberto case 'r': *pRes = '\r'; break; 85181834Sroberto case 't': *pRes = '\t'; break; 86181834Sroberto case 'v': *pRes = '\v'; break; 87181834Sroberto 88285612Sdelphij case 'x': 89285612Sdelphij case 'X': /* HEX Escape */ 90285612Sdelphij if (IS_HEX_DIGIT_CHAR(*pzIn)) { 91285612Sdelphij char z[4]; 92285612Sdelphij unsigned int ct = 0; 93181834Sroberto 94285612Sdelphij do { 95285612Sdelphij z[ct] = pzIn[ct]; 96285612Sdelphij if (++ct >= 2) 97285612Sdelphij break; 98285612Sdelphij } while (IS_HEX_DIGIT_CHAR(pzIn[ct])); 99285612Sdelphij z[ct] = NUL; 100285612Sdelphij *pRes = (char)strtoul(z, NULL, 16); 101285612Sdelphij return ct + 1; 102285612Sdelphij } 103285612Sdelphij break; 104181834Sroberto 105285612Sdelphij case '0': case '1': case '2': case '3': 106285612Sdelphij case '4': case '5': case '6': case '7': 107285612Sdelphij { 108285612Sdelphij /* 109285612Sdelphij * IF the character copied was an octal digit, 110285612Sdelphij * THEN set the output character to an octal value. 111285612Sdelphij * The 3 octal digit result might exceed 0xFF, so check it. 112285612Sdelphij */ 113285612Sdelphij char z[4]; 114285612Sdelphij unsigned long val; 115285612Sdelphij unsigned int ct = 0; 116181834Sroberto 117285612Sdelphij z[ct++] = *--pzIn; 118285612Sdelphij while (IS_OCT_DIGIT_CHAR(pzIn[ct])) { 119285612Sdelphij z[ct] = pzIn[ct]; 120285612Sdelphij if (++ct >= 3) 121181834Sroberto break; 122181834Sroberto } 123181834Sroberto 124285612Sdelphij z[ct] = NUL; 125285612Sdelphij val = strtoul(z, NULL, 8); 126285612Sdelphij if (val > 0xFF) 127285612Sdelphij val = 0xFF; 128285612Sdelphij *pRes = (char)val; 129285612Sdelphij return ct; 130285612Sdelphij } 131285612Sdelphij 132285612Sdelphij default: /* quoted character is result character */; 133285612Sdelphij } 134285612Sdelphij 135285612Sdelphij return res; 136285612Sdelphij} 137285612Sdelphij 138285612Sdelphij 139285612Sdelphij/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 140285612Sdelphij * 141285612Sdelphij * A quoted string has been found. 142285612Sdelphij * Find the end of it and compress any escape sequences. 143285612Sdelphij */ 144285612Sdelphijstatic bool 145285612Sdelphijcontiguous_quote(char ** pps, char * pq, int * lnct_p) 146285612Sdelphij{ 147285612Sdelphij char * ps = *pps + 1; 148285612Sdelphij 149285612Sdelphij for (;;) { 150285612Sdelphij while (IS_WHITESPACE_CHAR(*ps)) 151285612Sdelphij if (*(ps++) == NL) 152285612Sdelphij (*lnct_p)++; 153285612Sdelphij 154181834Sroberto /* 155285612Sdelphij * IF the next character is a quote character, 156285612Sdelphij * THEN we will concatenate the strings. 157181834Sroberto */ 158285612Sdelphij switch (*ps) { 159285612Sdelphij case '"': 160285612Sdelphij case '\'': 161285612Sdelphij *pq = *(ps++); /* assign new quote character and return */ 162285612Sdelphij *pps = ps; 163285612Sdelphij return true; 164181834Sroberto 165285612Sdelphij case '/': 166181834Sroberto /* 167285612Sdelphij * Allow for a comment embedded in the concatenated string. 168181834Sroberto */ 169285612Sdelphij switch (ps[1]) { 170285612Sdelphij default: 171285612Sdelphij *pps = NULL; 172285612Sdelphij return false; 173285612Sdelphij 174285612Sdelphij case '/': 175285612Sdelphij /* 176285612Sdelphij * Skip to end of line 177285612Sdelphij */ 178285612Sdelphij ps = strchr(ps, NL); 179285612Sdelphij if (ps == NULL) { 180285612Sdelphij *pps = NULL; 181285612Sdelphij return false; 182285612Sdelphij } 183181834Sroberto break; 184181834Sroberto 185285612Sdelphij case '*': 186285612Sdelphij { 187285612Sdelphij char * p = strstr( ps+2, "*/" ); 188285612Sdelphij /* 189285612Sdelphij * Skip to terminating star slash 190285612Sdelphij */ 191285612Sdelphij if (p == NULL) { 192285612Sdelphij *pps = NULL; 193285612Sdelphij return false; 194285612Sdelphij } 195181834Sroberto 196285612Sdelphij while (ps < p) { 197285612Sdelphij if (*(ps++) == NL) 198285612Sdelphij (*lnct_p)++; 199285612Sdelphij } 200285612Sdelphij 201285612Sdelphij ps = p + 2; 202181834Sroberto } 203285612Sdelphij } 204285612Sdelphij continue; 205181834Sroberto 206285612Sdelphij default: 207181834Sroberto /* 208285612Sdelphij * The next non-whitespace character is not a quote. 209285612Sdelphij * The series of quoted strings has come to an end. 210181834Sroberto */ 211285612Sdelphij *pps = ps; 212285612Sdelphij return false; 213181834Sroberto } 214181834Sroberto } 215181834Sroberto} 216181834Sroberto 217181834Sroberto/*=export_func ao_string_cook 218181834Sroberto * private: 219181834Sroberto * 220181834Sroberto * what: concatenate and escape-process strings 221285612Sdelphij * arg: + char * + pzScan + The *MODIFIABLE* input buffer + 222285612Sdelphij * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count + 223181834Sroberto * 224285612Sdelphij * ret-type: char * 225181834Sroberto * ret-desc: The address of the text following the processed strings. 226181834Sroberto * The return value is NULL if the strings are ill-formed. 227181834Sroberto * 228181834Sroberto * doc: 229181834Sroberto * 230181834Sroberto * A series of one or more quoted strings are concatenated together. 231181834Sroberto * If they are quoted with double quotes (@code{"}), then backslash 232181834Sroberto * escapes are processed per the C programming language. If they are 233181834Sroberto * single quote strings, then the backslashes are honored only when they 234181834Sroberto * precede another backslash or a single quote character. 235181834Sroberto * 236181834Sroberto * err: @code{NULL} is returned if the string(s) is/are mal-formed. 237181834Sroberto=*/ 238285612Sdelphijchar * 239285612Sdelphijao_string_cook(char * pzScan, int * lnct_p) 240181834Sroberto{ 241181834Sroberto int l = 0; 242181834Sroberto char q = *pzScan; 243181834Sroberto 244181834Sroberto /* 245181834Sroberto * It is a quoted string. Process the escape sequence characters 246181834Sroberto * (in the set "abfnrtv") and make sure we find a closing quote. 247181834Sroberto */ 248285612Sdelphij char * pzD = pzScan++; 249285612Sdelphij char * pzS = pzScan; 250181834Sroberto 251285612Sdelphij if (lnct_p == NULL) 252285612Sdelphij lnct_p = &l; 253181834Sroberto 254181834Sroberto for (;;) { 255181834Sroberto /* 256181834Sroberto * IF the next character is the quote character, THEN we may end the 257181834Sroberto * string. We end it unless the next non-blank character *after* the 258181834Sroberto * string happens to also be a quote. If it is, then we will change 259181834Sroberto * our quote character to the new quote character and continue 260181834Sroberto * condensing text. 261181834Sroberto */ 262181834Sroberto while (*pzS == q) { 263181834Sroberto *pzD = NUL; /* This is probably the end of the line */ 264285612Sdelphij if (! contiguous_quote(&pzS, &q, lnct_p)) 265181834Sroberto return pzS; 266181834Sroberto } 267181834Sroberto 268181834Sroberto /* 269181834Sroberto * We are inside a quoted string. Copy text. 270181834Sroberto */ 271181834Sroberto switch (*(pzD++) = *(pzS++)) { 272181834Sroberto case NUL: 273181834Sroberto return NULL; 274181834Sroberto 275285612Sdelphij case NL: 276285612Sdelphij (*lnct_p)++; 277181834Sroberto break; 278181834Sroberto 279181834Sroberto case '\\': 280181834Sroberto /* 281181834Sroberto * IF we are escaping a new line, 282181834Sroberto * THEN drop both the escape and the newline from 283181834Sroberto * the result string. 284181834Sroberto */ 285285612Sdelphij if (*pzS == NL) { 286181834Sroberto pzS++; 287181834Sroberto pzD--; 288285612Sdelphij (*lnct_p)++; 289181834Sroberto } 290181834Sroberto 291181834Sroberto /* 292181834Sroberto * ELSE IF the quote character is '"' or '`', 293181834Sroberto * THEN we do the full escape character processing 294181834Sroberto */ 295181834Sroberto else if (q != '\'') { 296285612Sdelphij unsigned int ct; 297285612Sdelphij ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL); 298181834Sroberto if (ct == 0) 299181834Sroberto return NULL; 300181834Sroberto 301181834Sroberto pzS += ct; 302181834Sroberto } /* if (q != '\'') */ 303181834Sroberto 304181834Sroberto /* 305181834Sroberto * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 306181834Sroberto * The latter only to easily hide preprocessing directives. 307181834Sroberto */ 308181834Sroberto else switch (*pzS) { 309181834Sroberto case '\\': 310181834Sroberto case '\'': 311181834Sroberto case '#': 312181834Sroberto pzD[-1] = *pzS++; 313181834Sroberto } 314181834Sroberto } /* switch (*(pzD++) = *(pzS++)) */ 315181834Sroberto } /* for (;;) */ 316181834Sroberto} 317285612Sdelphij 318285612Sdelphij/** @} 319285612Sdelphij * 320181834Sroberto * Local Variables: 321181834Sroberto * mode: C 322181834Sroberto * c-file-style: "stroustrup" 323181834Sroberto * indent-tabs-mode: nil 324181834Sroberto * End: 325181834Sroberto * end of autoopts/cook.c */ 326