cook.c revision 181834
1181834Sroberto 2181834Sroberto/* 3181834Sroberto * $Id: cook.c,v 4.10 2007/02/04 17:44:12 bkorb Exp $ 4181834Sroberto * Time-stamp: "2006-09-24 15:21:02 bkorb" 5181834Sroberto * 6181834Sroberto * This file contains the routines that deal with processing quoted strings 7181834Sroberto * into an internal format. 8181834Sroberto */ 9181834Sroberto 10181834Sroberto/* 11181834Sroberto * Automated Options copyright 1992-2007 Bruce Korb 12181834Sroberto * 13181834Sroberto * Automated Options is free software. 14181834Sroberto * You may redistribute it and/or modify it under the terms of the 15181834Sroberto * GNU General Public License, as published by the Free Software 16181834Sroberto * Foundation; either version 2, or (at your option) any later version. 17181834Sroberto * 18181834Sroberto * Automated Options is distributed in the hope that it will be useful, 19181834Sroberto * but WITHOUT ANY WARRANTY; without even the implied warranty of 20181834Sroberto * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21181834Sroberto * GNU General Public License for more details. 22181834Sroberto * 23181834Sroberto * You should have received a copy of the GNU General Public License 24181834Sroberto * along with Automated Options. See the file "COPYING". If not, 25181834Sroberto * write to: The Free Software Foundation, Inc., 26181834Sroberto * 51 Franklin Street, Fifth Floor, 27181834Sroberto * Boston, MA 02110-1301, USA. 28181834Sroberto * 29181834Sroberto * As a special exception, Bruce Korb gives permission for additional 30181834Sroberto * uses of the text contained in his release of AutoOpts. 31181834Sroberto * 32181834Sroberto * The exception is that, if you link the AutoOpts library with other 33181834Sroberto * files to produce an executable, this does not by itself cause the 34181834Sroberto * resulting executable to be covered by the GNU General Public License. 35181834Sroberto * Your use of that executable is in no way restricted on account of 36181834Sroberto * linking the AutoOpts library code into it. 37181834Sroberto * 38181834Sroberto * This exception does not however invalidate any other reasons why 39181834Sroberto * the executable file might be covered by the GNU General Public License. 40181834Sroberto * 41181834Sroberto * This exception applies only to the code released by Bruce Korb under 42181834Sroberto * the name AutoOpts. If you copy code from other sources under the 43181834Sroberto * General Public License into a copy of AutoOpts, as the General Public 44181834Sroberto * License permits, the exception does not apply to the code that you add 45181834Sroberto * in this way. To avoid misleading anyone as to the status of such 46181834Sroberto * modified files, you must delete this exception notice from them. 47181834Sroberto * 48181834Sroberto * If you write modifications of your own for AutoOpts, it is your choice 49181834Sroberto * whether to permit this exception to apply to your modifications. 50181834Sroberto * If you do not wish that, delete this exception notice. 51181834Sroberto */ 52181834Sroberto 53181834Sroberto/* = = = START-STATIC-FORWARD = = = */ 54181834Sroberto/* static forward declarations maintained by :mkfwd */ 55181834Sroberto/* = = = END-STATIC-FORWARD = = = */ 56181834Sroberto 57181834Sroberto/*=export_func ao_string_cook_escape_char 58181834Sroberto * private: 59181834Sroberto * 60181834Sroberto * what: escape-process a string fragment 61181834Sroberto * arg: + char const* + pzScan + points to character after the escape + 62181834Sroberto * arg: + char* + pRes + Where to put the result byte + 63181834Sroberto * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 64181834Sroberto * 65181834Sroberto * ret-type: unsigned int 66181834Sroberto * ret-desc: The number of bytes consumed processing the escaped character. 67181834Sroberto * 68181834Sroberto * doc: 69181834Sroberto * 70181834Sroberto * This function converts "t" into "\t" and all your other favorite 71181834Sroberto * escapes, including numeric ones: hex and ocatal, too. 72181834Sroberto * The returned result tells the caller how far to advance the 73181834Sroberto * scan pointer (passed in). The default is to just pass through the 74181834Sroberto * escaped character and advance the scan by one. 75181834Sroberto * 76181834Sroberto * Some applications need to keep an escaped newline, others need to 77181834Sroberto * suppress it. This is accomplished by supplying a '\n' replacement 78181834Sroberto * character that is different from \n, if need be. For example, use 79181834Sroberto * 0x7F and never emit a 0x7F. 80181834Sroberto * 81181834Sroberto * err: @code{NULL} is returned if the string is mal-formed. 82181834Sroberto=*/ 83181834Srobertounsigned int 84181834Srobertoao_string_cook_escape_char( char const* pzIn, char* pRes, u_int nl ) 85181834Sroberto{ 86181834Sroberto unsigned int res = 1; 87181834Sroberto 88181834Sroberto switch (*pRes = *pzIn++) { 89181834Sroberto case NUL: /* NUL - end of input string */ 90181834Sroberto return 0; 91181834Sroberto case '\r': 92181834Sroberto if (*pzIn != '\n') 93181834Sroberto return 1; 94181834Sroberto res++; 95181834Sroberto /* FALLTHROUGH */ 96181834Sroberto case '\n': /* NL - emit newline */ 97181834Sroberto *pRes = (char)nl; 98181834Sroberto return res; 99181834Sroberto 100181834Sroberto case 'a': *pRes = '\a'; break; 101181834Sroberto case 'b': *pRes = '\b'; break; 102181834Sroberto case 'f': *pRes = '\f'; break; 103181834Sroberto case 'n': *pRes = '\n'; break; 104181834Sroberto case 'r': *pRes = '\r'; break; 105181834Sroberto case 't': *pRes = '\t'; break; 106181834Sroberto case 'v': *pRes = '\v'; break; 107181834Sroberto 108181834Sroberto case 'x': /* HEX Escape */ 109181834Sroberto if (isxdigit( (int)*pzIn )) { 110181834Sroberto unsigned int val; 111181834Sroberto unsigned char ch = *pzIn++; 112181834Sroberto 113181834Sroberto if ((ch >= 'A') && (ch <= 'F')) 114181834Sroberto val = 10 + (ch - 'A'); 115181834Sroberto else if ((ch >= 'a') && (ch <= 'f')) 116181834Sroberto val = 10 + (ch - 'a'); 117181834Sroberto else val = ch - '0'; 118181834Sroberto 119181834Sroberto ch = *pzIn; 120181834Sroberto 121181834Sroberto if (! isxdigit( ch )) { 122181834Sroberto *pRes = val; 123181834Sroberto res = 2; 124181834Sroberto break; 125181834Sroberto } 126181834Sroberto val <<= 4; 127181834Sroberto if ((ch >= 'A') && (ch <= 'F')) 128181834Sroberto val += 10 + (ch - 'A'); 129181834Sroberto else if ((ch >= 'a') && (ch <= 'f')) 130181834Sroberto val += 10 + (ch - 'a'); 131181834Sroberto else val += ch - '0'; 132181834Sroberto 133181834Sroberto res = 3; 134181834Sroberto *pRes = val; 135181834Sroberto } 136181834Sroberto break; 137181834Sroberto 138181834Sroberto default: 139181834Sroberto /* 140181834Sroberto * IF the character copied was an octal digit, 141181834Sroberto * THEN set the output character to an octal value 142181834Sroberto */ 143181834Sroberto if (isdigit( (int)*pRes ) && (*pRes < '8')) { 144181834Sroberto unsigned int val = *pRes - '0'; 145181834Sroberto unsigned char ch = *pzIn++; 146181834Sroberto 147181834Sroberto /* 148181834Sroberto * IF the second character is *not* an octal digit, 149181834Sroberto * THEN save the value and bail 150181834Sroberto */ 151181834Sroberto if ((ch < '0') || (ch > '7')) { 152181834Sroberto *pRes = val; 153181834Sroberto break; 154181834Sroberto } 155181834Sroberto 156181834Sroberto val = (val<<3) + (ch - '0'); 157181834Sroberto ch = *pzIn; 158181834Sroberto res = 2; 159181834Sroberto 160181834Sroberto /* 161181834Sroberto * IF the THIRD character is *not* an octal digit, 162181834Sroberto * THEN save the value and bail 163181834Sroberto */ 164181834Sroberto if ((ch < '0') || (ch > '7')) { 165181834Sroberto *pRes = val; 166181834Sroberto break; 167181834Sroberto } 168181834Sroberto 169181834Sroberto /* 170181834Sroberto * IF the new value would not be too large, 171181834Sroberto * THEN add on the third and last character value 172181834Sroberto */ 173181834Sroberto if ((val<<3) < 0xFF) { 174181834Sroberto val = (val<<3) + (ch - '0'); 175181834Sroberto res = 3; 176181834Sroberto } 177181834Sroberto 178181834Sroberto *pRes = val; 179181834Sroberto break; 180181834Sroberto } 181181834Sroberto } 182181834Sroberto 183181834Sroberto return res; 184181834Sroberto} 185181834Sroberto 186181834Sroberto 187181834Sroberto/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 188181834Sroberto * 189181834Sroberto * A quoted string has been found. 190181834Sroberto * Find the end of it and compress any escape sequences. 191181834Sroberto */ 192181834Sroberto/*=export_func ao_string_cook 193181834Sroberto * private: 194181834Sroberto * 195181834Sroberto * what: concatenate and escape-process strings 196181834Sroberto * arg: + char* + pzScan + The *MODIFIABLE* input buffer + 197181834Sroberto * arg: + int* + pLineCt + The (possibly NULL) pointer to a line count + 198181834Sroberto * 199181834Sroberto * ret-type: char* 200181834Sroberto * ret-desc: The address of the text following the processed strings. 201181834Sroberto * The return value is NULL if the strings are ill-formed. 202181834Sroberto * 203181834Sroberto * doc: 204181834Sroberto * 205181834Sroberto * A series of one or more quoted strings are concatenated together. 206181834Sroberto * If they are quoted with double quotes (@code{"}), then backslash 207181834Sroberto * escapes are processed per the C programming language. If they are 208181834Sroberto * single quote strings, then the backslashes are honored only when they 209181834Sroberto * precede another backslash or a single quote character. 210181834Sroberto * 211181834Sroberto * err: @code{NULL} is returned if the string(s) is/are mal-formed. 212181834Sroberto=*/ 213181834Srobertochar* 214181834Srobertoao_string_cook( char* pzScan, int* pLineCt ) 215181834Sroberto{ 216181834Sroberto int l = 0; 217181834Sroberto char q = *pzScan; 218181834Sroberto 219181834Sroberto /* 220181834Sroberto * It is a quoted string. Process the escape sequence characters 221181834Sroberto * (in the set "abfnrtv") and make sure we find a closing quote. 222181834Sroberto */ 223181834Sroberto char* pzD = pzScan++; 224181834Sroberto char* pzS = pzScan; 225181834Sroberto 226181834Sroberto if (pLineCt == NULL) 227181834Sroberto pLineCt = &l; 228181834Sroberto 229181834Sroberto for (;;) { 230181834Sroberto /* 231181834Sroberto * IF the next character is the quote character, THEN we may end the 232181834Sroberto * string. We end it unless the next non-blank character *after* the 233181834Sroberto * string happens to also be a quote. If it is, then we will change 234181834Sroberto * our quote character to the new quote character and continue 235181834Sroberto * condensing text. 236181834Sroberto */ 237181834Sroberto while (*pzS == q) { 238181834Sroberto *pzD = NUL; /* This is probably the end of the line */ 239181834Sroberto pzS++; 240181834Sroberto 241181834Sroberto scan_for_quote: 242181834Sroberto while (isspace((int)*pzS)) 243181834Sroberto if (*(pzS++) == '\n') 244181834Sroberto (*pLineCt)++; 245181834Sroberto 246181834Sroberto /* 247181834Sroberto * IF the next character is a quote character, 248181834Sroberto * THEN we will concatenate the strings. 249181834Sroberto */ 250181834Sroberto switch (*pzS) { 251181834Sroberto case '"': 252181834Sroberto case '\'': 253181834Sroberto break; 254181834Sroberto 255181834Sroberto case '/': 256181834Sroberto /* 257181834Sroberto * Allow for a comment embedded in the concatenated string. 258181834Sroberto */ 259181834Sroberto switch (pzS[1]) { 260181834Sroberto default: return NULL; 261181834Sroberto case '/': 262181834Sroberto /* 263181834Sroberto * Skip to end of line 264181834Sroberto */ 265181834Sroberto pzS = strchr( pzS, '\n' ); 266181834Sroberto if (pzS == NULL) 267181834Sroberto return NULL; 268181834Sroberto (*pLineCt)++; 269181834Sroberto break; 270181834Sroberto 271181834Sroberto case '*': 272181834Sroberto { 273181834Sroberto char* p = strstr( pzS+2, "*/" ); 274181834Sroberto /* 275181834Sroberto * Skip to terminating star slash 276181834Sroberto */ 277181834Sroberto if (p == NULL) 278181834Sroberto return NULL; 279181834Sroberto while (pzS < p) { 280181834Sroberto if (*(pzS++) == '\n') 281181834Sroberto (*pLineCt)++; 282181834Sroberto } 283181834Sroberto 284181834Sroberto pzS = p + 2; 285181834Sroberto } 286181834Sroberto } 287181834Sroberto goto scan_for_quote; 288181834Sroberto 289181834Sroberto default: 290181834Sroberto /* 291181834Sroberto * The next non-whitespace character is not a quote. 292181834Sroberto * The series of quoted strings has come to an end. 293181834Sroberto */ 294181834Sroberto return pzS; 295181834Sroberto } 296181834Sroberto 297181834Sroberto q = *(pzS++); /* assign new quote character and advance scan */ 298181834Sroberto } 299181834Sroberto 300181834Sroberto /* 301181834Sroberto * We are inside a quoted string. Copy text. 302181834Sroberto */ 303181834Sroberto switch (*(pzD++) = *(pzS++)) { 304181834Sroberto case NUL: 305181834Sroberto return NULL; 306181834Sroberto 307181834Sroberto case '\n': 308181834Sroberto (*pLineCt)++; 309181834Sroberto break; 310181834Sroberto 311181834Sroberto case '\\': 312181834Sroberto /* 313181834Sroberto * IF we are escaping a new line, 314181834Sroberto * THEN drop both the escape and the newline from 315181834Sroberto * the result string. 316181834Sroberto */ 317181834Sroberto if (*pzS == '\n') { 318181834Sroberto pzS++; 319181834Sroberto pzD--; 320181834Sroberto (*pLineCt)++; 321181834Sroberto } 322181834Sroberto 323181834Sroberto /* 324181834Sroberto * ELSE IF the quote character is '"' or '`', 325181834Sroberto * THEN we do the full escape character processing 326181834Sroberto */ 327181834Sroberto else if (q != '\'') { 328181834Sroberto int ct = ao_string_cook_escape_char( pzS, pzD-1, (u_int)'\n' ); 329181834Sroberto if (ct == 0) 330181834Sroberto return NULL; 331181834Sroberto 332181834Sroberto pzS += ct; 333181834Sroberto } /* if (q != '\'') */ 334181834Sroberto 335181834Sroberto /* 336181834Sroberto * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 337181834Sroberto * The latter only to easily hide preprocessing directives. 338181834Sroberto */ 339181834Sroberto else switch (*pzS) { 340181834Sroberto case '\\': 341181834Sroberto case '\'': 342181834Sroberto case '#': 343181834Sroberto pzD[-1] = *pzS++; 344181834Sroberto } 345181834Sroberto } /* switch (*(pzD++) = *(pzS++)) */ 346181834Sroberto } /* for (;;) */ 347181834Sroberto} 348181834Sroberto/* 349181834Sroberto * Local Variables: 350181834Sroberto * mode: C 351181834Sroberto * c-file-style: "stroustrup" 352181834Sroberto * indent-tabs-mode: nil 353181834Sroberto * End: 354181834Sroberto * end of autoopts/cook.c */ 355