1280849Scy/** 2280849Scy * \file cook.c 3181834Sroberto * 4181834Sroberto * This file contains the routines that deal with processing quoted strings 5181834Sroberto * into an internal format. 6280849Scy * 7280849Scy * @addtogroup autoopts 8280849Scy * @{ 9181834Sroberto */ 10181834Sroberto/* 11280849Scy * This file is part of AutoOpts, a companion to AutoGen. 12280849Scy * AutoOpts is free software. 13285169Scy * AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved 14181834Sroberto * 15280849Scy * AutoOpts is available under any one of two licenses. The license 16280849Scy * in use must be one of these two and the choice is under the control 17280849Scy * of the user of the license. 18181834Sroberto * 19280849Scy * The GNU Lesser General Public License, version 3 or later 20280849Scy * See the files "COPYING.lgplv3" and "COPYING.gplv3" 21181834Sroberto * 22280849Scy * The Modified Berkeley Software Distribution License 23280849Scy * See the file "COPYING.mbsd" 24181834Sroberto * 25280849Scy * These files have the following sha256 sums: 26181834Sroberto * 27280849Scy * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 28280849Scy * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 29280849Scy * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 30181834Sroberto */ 31181834Sroberto 32181834Sroberto/* = = = START-STATIC-FORWARD = = = */ 33280849Scystatic bool 34280849Scycontiguous_quote(char ** pps, char * pq, int * lnct_p); 35181834Sroberto/* = = = END-STATIC-FORWARD = = = */ 36181834Sroberto 37181834Sroberto/*=export_func ao_string_cook_escape_char 38181834Sroberto * private: 39181834Sroberto * 40181834Sroberto * what: escape-process a string fragment 41285169Scy * arg: + char const * + pzScan + points to character after the escape + 42285169Scy * arg: + char * + pRes + Where to put the result byte + 43181834Sroberto * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 44181834Sroberto * 45181834Sroberto * ret-type: unsigned int 46181834Sroberto * ret-desc: The number of bytes consumed processing the escaped character. 47181834Sroberto * 48181834Sroberto * doc: 49181834Sroberto * 50181834Sroberto * This function converts "t" into "\t" and all your other favorite 51181834Sroberto * escapes, including numeric ones: hex and ocatal, too. 52181834Sroberto * The returned result tells the caller how far to advance the 53181834Sroberto * scan pointer (passed in). The default is to just pass through the 54181834Sroberto * escaped character and advance the scan by one. 55181834Sroberto * 56181834Sroberto * Some applications need to keep an escaped newline, others need to 57181834Sroberto * suppress it. This is accomplished by supplying a '\n' replacement 58181834Sroberto * character that is different from \n, if need be. For example, use 59181834Sroberto * 0x7F and never emit a 0x7F. 60181834Sroberto * 61181834Sroberto * err: @code{NULL} is returned if the string is mal-formed. 62181834Sroberto=*/ 63181834Srobertounsigned int 64280849Scyao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl) 65181834Sroberto{ 66280849Scy unsigned int res = 1; 67181834Sroberto 68181834Sroberto switch (*pRes = *pzIn++) { 69181834Sroberto case NUL: /* NUL - end of input string */ 70181834Sroberto return 0; 71181834Sroberto case '\r': 72280849Scy if (*pzIn != NL) 73181834Sroberto return 1; 74181834Sroberto res++; 75181834Sroberto /* FALLTHROUGH */ 76280849Scy case NL: /* NL - emit newline */ 77181834Sroberto *pRes = (char)nl; 78181834Sroberto return res; 79181834Sroberto 80181834Sroberto case 'a': *pRes = '\a'; break; 81181834Sroberto case 'b': *pRes = '\b'; break; 82181834Sroberto case 'f': *pRes = '\f'; break; 83280849Scy case 'n': *pRes = NL; break; 84181834Sroberto case 'r': *pRes = '\r'; break; 85181834Sroberto case 't': *pRes = '\t'; break; 86181834Sroberto case 'v': *pRes = '\v'; break; 87181834Sroberto 88280849Scy case 'x': 89280849Scy case 'X': /* HEX Escape */ 90280849Scy if (IS_HEX_DIGIT_CHAR(*pzIn)) { 91280849Scy char z[4]; 92280849Scy unsigned int ct = 0; 93181834Sroberto 94280849Scy do { 95280849Scy z[ct] = pzIn[ct]; 96280849Scy if (++ct >= 2) 97280849Scy break; 98280849Scy } while (IS_HEX_DIGIT_CHAR(pzIn[ct])); 99280849Scy z[ct] = NUL; 100280849Scy *pRes = (char)strtoul(z, NULL, 16); 101280849Scy return ct + 1; 102280849Scy } 103280849Scy break; 104181834Sroberto 105280849Scy case '0': case '1': case '2': case '3': 106280849Scy case '4': case '5': case '6': case '7': 107280849Scy { 108280849Scy /* 109280849Scy * IF the character copied was an octal digit, 110280849Scy * THEN set the output character to an octal value. 111280849Scy * The 3 octal digit result might exceed 0xFF, so check it. 112280849Scy */ 113280849Scy char z[4]; 114280849Scy unsigned long val; 115280849Scy unsigned int ct = 0; 116181834Sroberto 117280849Scy z[ct++] = *--pzIn; 118280849Scy while (IS_OCT_DIGIT_CHAR(pzIn[ct])) { 119280849Scy z[ct] = pzIn[ct]; 120280849Scy if (++ct >= 3) 121181834Sroberto break; 122181834Sroberto } 123181834Sroberto 124280849Scy z[ct] = NUL; 125280849Scy val = strtoul(z, NULL, 8); 126280849Scy if (val > 0xFF) 127280849Scy val = 0xFF; 128280849Scy *pRes = (char)val; 129280849Scy return ct; 130280849Scy } 131280849Scy 132280849Scy default: /* quoted character is result character */; 133280849Scy } 134280849Scy 135280849Scy return res; 136280849Scy} 137280849Scy 138280849Scy 139280849Scy/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 140280849Scy * 141280849Scy * A quoted string has been found. 142280849Scy * Find the end of it and compress any escape sequences. 143280849Scy */ 144280849Scystatic bool 145280849Scycontiguous_quote(char ** pps, char * pq, int * lnct_p) 146280849Scy{ 147280849Scy char * ps = *pps + 1; 148280849Scy 149280849Scy for (;;) { 150280849Scy while (IS_WHITESPACE_CHAR(*ps)) 151280849Scy if (*(ps++) == NL) 152280849Scy (*lnct_p)++; 153280849Scy 154181834Sroberto /* 155280849Scy * IF the next character is a quote character, 156280849Scy * THEN we will concatenate the strings. 157181834Sroberto */ 158280849Scy switch (*ps) { 159280849Scy case '"': 160280849Scy case '\'': 161280849Scy *pq = *(ps++); /* assign new quote character and return */ 162280849Scy *pps = ps; 163280849Scy return true; 164181834Sroberto 165280849Scy case '/': 166181834Sroberto /* 167280849Scy * Allow for a comment embedded in the concatenated string. 168181834Sroberto */ 169280849Scy switch (ps[1]) { 170280849Scy default: 171280849Scy *pps = NULL; 172280849Scy return false; 173280849Scy 174280849Scy case '/': 175280849Scy /* 176280849Scy * Skip to end of line 177280849Scy */ 178280849Scy ps = strchr(ps, NL); 179280849Scy if (ps == NULL) { 180280849Scy *pps = NULL; 181280849Scy return false; 182280849Scy } 183181834Sroberto break; 184181834Sroberto 185280849Scy case '*': 186280849Scy { 187285169Scy char * p = strstr( ps+2, "*/" ); 188280849Scy /* 189280849Scy * Skip to terminating star slash 190280849Scy */ 191280849Scy if (p == NULL) { 192280849Scy *pps = NULL; 193280849Scy return false; 194280849Scy } 195181834Sroberto 196280849Scy while (ps < p) { 197280849Scy if (*(ps++) == NL) 198280849Scy (*lnct_p)++; 199280849Scy } 200280849Scy 201280849Scy ps = p + 2; 202181834Sroberto } 203280849Scy } 204280849Scy continue; 205181834Sroberto 206280849Scy default: 207181834Sroberto /* 208280849Scy * The next non-whitespace character is not a quote. 209280849Scy * The series of quoted strings has come to an end. 210181834Sroberto */ 211280849Scy *pps = ps; 212280849Scy return false; 213181834Sroberto } 214181834Sroberto } 215181834Sroberto} 216181834Sroberto 217181834Sroberto/*=export_func ao_string_cook 218181834Sroberto * private: 219181834Sroberto * 220181834Sroberto * what: concatenate and escape-process strings 221285169Scy * arg: + char * + pzScan + The *MODIFIABLE* input buffer + 222285169Scy * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count + 223181834Sroberto * 224285169Scy * ret-type: char * 225181834Sroberto * ret-desc: The address of the text following the processed strings. 226181834Sroberto * The return value is NULL if the strings are ill-formed. 227181834Sroberto * 228181834Sroberto * doc: 229181834Sroberto * 230181834Sroberto * A series of one or more quoted strings are concatenated together. 231181834Sroberto * If they are quoted with double quotes (@code{"}), then backslash 232181834Sroberto * escapes are processed per the C programming language. If they are 233181834Sroberto * single quote strings, then the backslashes are honored only when they 234181834Sroberto * precede another backslash or a single quote character. 235181834Sroberto * 236181834Sroberto * err: @code{NULL} is returned if the string(s) is/are mal-formed. 237181834Sroberto=*/ 238280849Scychar * 239280849Scyao_string_cook(char * pzScan, int * lnct_p) 240181834Sroberto{ 241181834Sroberto int l = 0; 242181834Sroberto char q = *pzScan; 243181834Sroberto 244181834Sroberto /* 245181834Sroberto * It is a quoted string. Process the escape sequence characters 246181834Sroberto * (in the set "abfnrtv") and make sure we find a closing quote. 247181834Sroberto */ 248285169Scy char * pzD = pzScan++; 249285169Scy char * pzS = pzScan; 250181834Sroberto 251280849Scy if (lnct_p == NULL) 252280849Scy lnct_p = &l; 253181834Sroberto 254181834Sroberto for (;;) { 255181834Sroberto /* 256181834Sroberto * IF the next character is the quote character, THEN we may end the 257181834Sroberto * string. We end it unless the next non-blank character *after* the 258181834Sroberto * string happens to also be a quote. If it is, then we will change 259181834Sroberto * our quote character to the new quote character and continue 260181834Sroberto * condensing text. 261181834Sroberto */ 262181834Sroberto while (*pzS == q) { 263181834Sroberto *pzD = NUL; /* This is probably the end of the line */ 264280849Scy if (! contiguous_quote(&pzS, &q, lnct_p)) 265181834Sroberto return pzS; 266181834Sroberto } 267181834Sroberto 268181834Sroberto /* 269181834Sroberto * We are inside a quoted string. Copy text. 270181834Sroberto */ 271181834Sroberto switch (*(pzD++) = *(pzS++)) { 272181834Sroberto case NUL: 273181834Sroberto return NULL; 274181834Sroberto 275280849Scy case NL: 276280849Scy (*lnct_p)++; 277181834Sroberto break; 278181834Sroberto 279181834Sroberto case '\\': 280181834Sroberto /* 281181834Sroberto * IF we are escaping a new line, 282181834Sroberto * THEN drop both the escape and the newline from 283181834Sroberto * the result string. 284181834Sroberto */ 285280849Scy if (*pzS == NL) { 286181834Sroberto pzS++; 287181834Sroberto pzD--; 288280849Scy (*lnct_p)++; 289181834Sroberto } 290181834Sroberto 291181834Sroberto /* 292181834Sroberto * ELSE IF the quote character is '"' or '`', 293181834Sroberto * THEN we do the full escape character processing 294181834Sroberto */ 295181834Sroberto else if (q != '\'') { 296280849Scy unsigned int ct; 297280849Scy ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL); 298181834Sroberto if (ct == 0) 299181834Sroberto return NULL; 300181834Sroberto 301181834Sroberto pzS += ct; 302181834Sroberto } /* if (q != '\'') */ 303181834Sroberto 304181834Sroberto /* 305181834Sroberto * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 306181834Sroberto * The latter only to easily hide preprocessing directives. 307181834Sroberto */ 308181834Sroberto else switch (*pzS) { 309181834Sroberto case '\\': 310181834Sroberto case '\'': 311181834Sroberto case '#': 312181834Sroberto pzD[-1] = *pzS++; 313181834Sroberto } 314181834Sroberto } /* switch (*(pzD++) = *(pzS++)) */ 315181834Sroberto } /* for (;;) */ 316181834Sroberto} 317280849Scy 318280849Scy/** @} 319280849Scy * 320181834Sroberto * Local Variables: 321181834Sroberto * mode: C 322181834Sroberto * c-file-style: "stroustrup" 323181834Sroberto * indent-tabs-mode: nil 324181834Sroberto * End: 325181834Sroberto * end of autoopts/cook.c */ 326