cook.c revision 181834
1
2/*
3 *  $Id: cook.c,v 4.10 2007/02/04 17:44:12 bkorb Exp $
4 *  Time-stamp:      "2006-09-24 15:21:02 bkorb"
5 *
6 *  This file contains the routines that deal with processing quoted strings
7 *  into an internal format.
8 */
9
10/*
11 *  Automated Options copyright 1992-2007 Bruce Korb
12 *
13 *  Automated Options is free software.
14 *  You may redistribute it and/or modify it under the terms of the
15 *  GNU General Public License, as published by the Free Software
16 *  Foundation; either version 2, or (at your option) any later version.
17 *
18 *  Automated Options is distributed in the hope that it will be useful,
19 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 *  GNU General Public License for more details.
22 *
23 *  You should have received a copy of the GNU General Public License
24 *  along with Automated Options.  See the file "COPYING".  If not,
25 *  write to:  The Free Software Foundation, Inc.,
26 *             51 Franklin Street, Fifth Floor,
27 *             Boston, MA  02110-1301, USA.
28 *
29 * As a special exception, Bruce Korb gives permission for additional
30 * uses of the text contained in his release of AutoOpts.
31 *
32 * The exception is that, if you link the AutoOpts library with other
33 * files to produce an executable, this does not by itself cause the
34 * resulting executable to be covered by the GNU General Public License.
35 * Your use of that executable is in no way restricted on account of
36 * linking the AutoOpts library code into it.
37 *
38 * This exception does not however invalidate any other reasons why
39 * the executable file might be covered by the GNU General Public License.
40 *
41 * This exception applies only to the code released by Bruce Korb under
42 * the name AutoOpts.  If you copy code from other sources under the
43 * General Public License into a copy of AutoOpts, as the General Public
44 * License permits, the exception does not apply to the code that you add
45 * in this way.  To avoid misleading anyone as to the status of such
46 * modified files, you must delete this exception notice from them.
47 *
48 * If you write modifications of your own for AutoOpts, it is your choice
49 * whether to permit this exception to apply to your modifications.
50 * If you do not wish that, delete this exception notice.
51 */
52
53/* = = = START-STATIC-FORWARD = = = */
54/* static forward declarations maintained by :mkfwd */
55/* = = = END-STATIC-FORWARD = = = */
56
57/*=export_func  ao_string_cook_escape_char
58 * private:
59 *
60 * what:  escape-process a string fragment
61 * arg:   + char const*  + pzScan  + points to character after the escape +
62 * arg:   + char*        + pRes    + Where to put the result byte +
63 * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
64 *
65 * ret-type: unsigned int
66 * ret-desc: The number of bytes consumed processing the escaped character.
67 *
68 * doc:
69 *
70 *  This function converts "t" into "\t" and all your other favorite
71 *  escapes, including numeric ones:  hex and ocatal, too.
72 *  The returned result tells the caller how far to advance the
73 *  scan pointer (passed in).  The default is to just pass through the
74 *  escaped character and advance the scan by one.
75 *
76 *  Some applications need to keep an escaped newline, others need to
77 *  suppress it.  This is accomplished by supplying a '\n' replacement
78 *  character that is different from \n, if need be.  For example, use
79 *  0x7F and never emit a 0x7F.
80 *
81 * err:  @code{NULL} is returned if the string is mal-formed.
82=*/
83unsigned int
84ao_string_cook_escape_char( char const* pzIn, char* pRes, u_int nl )
85{
86    unsigned int  res = 1;
87
88    switch (*pRes = *pzIn++) {
89    case NUL:         /* NUL - end of input string */
90        return 0;
91    case '\r':
92        if (*pzIn != '\n')
93            return 1;
94        res++;
95        /* FALLTHROUGH */
96    case '\n':        /* NL  - emit newline        */
97        *pRes = (char)nl;
98        return res;
99
100    case 'a': *pRes = '\a'; break;
101    case 'b': *pRes = '\b'; break;
102    case 'f': *pRes = '\f'; break;
103    case 'n': *pRes = '\n'; break;
104    case 'r': *pRes = '\r'; break;
105    case 't': *pRes = '\t'; break;
106    case 'v': *pRes = '\v'; break;
107
108    case 'x':         /* HEX Escape       */
109        if (isxdigit( (int)*pzIn ))  {
110            unsigned int  val;
111            unsigned char ch = *pzIn++;
112
113            if ((ch >= 'A') && (ch <= 'F'))
114                val = 10 + (ch - 'A');
115            else if ((ch >= 'a') && (ch <= 'f'))
116                val = 10 + (ch - 'a');
117            else val = ch - '0';
118
119            ch = *pzIn;
120
121            if (! isxdigit( ch )) {
122                *pRes = val;
123                res   = 2;
124                break;
125            }
126            val <<= 4;
127            if ((ch >= 'A') && (ch <= 'F'))
128                val += 10 + (ch - 'A');
129            else if ((ch >= 'a') && (ch <= 'f'))
130                val += 10 + (ch - 'a');
131            else val += ch - '0';
132
133            res = 3;
134            *pRes = val;
135        }
136        break;
137
138    default:
139        /*
140         *  IF the character copied was an octal digit,
141         *  THEN set the output character to an octal value
142         */
143        if (isdigit( (int)*pRes ) && (*pRes < '8'))  {
144            unsigned int  val = *pRes - '0';
145            unsigned char ch  = *pzIn++;
146
147            /*
148             *  IF the second character is *not* an octal digit,
149             *  THEN save the value and bail
150             */
151            if ((ch < '0') || (ch > '7')) {
152                *pRes = val;
153                break;
154            }
155
156            val = (val<<3) + (ch - '0');
157            ch  = *pzIn;
158            res = 2;
159
160            /*
161             *  IF the THIRD character is *not* an octal digit,
162             *  THEN save the value and bail
163             */
164            if ((ch < '0') || (ch > '7')) {
165                *pRes = val;
166                break;
167            }
168
169            /*
170             *  IF the new value would not be too large,
171             *  THEN add on the third and last character value
172             */
173            if ((val<<3) < 0xFF) {
174                val = (val<<3) + (ch - '0');
175                res = 3;
176            }
177
178            *pRes = val;
179            break;
180        }
181    }
182
183    return res;
184}
185
186
187/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
188 *
189 *  A quoted string has been found.
190 *  Find the end of it and compress any escape sequences.
191 */
192/*=export_func  ao_string_cook
193 * private:
194 *
195 * what:  concatenate and escape-process strings
196 * arg:   + char* + pzScan     + The *MODIFIABLE* input buffer +
197 * arg:   + int*  + pLineCt    + The (possibly NULL) pointer to a line count +
198 *
199 * ret-type: char*
200 * ret-desc: The address of the text following the processed strings.
201 *           The return value is NULL if the strings are ill-formed.
202 *
203 * doc:
204 *
205 *  A series of one or more quoted strings are concatenated together.
206 *  If they are quoted with double quotes (@code{"}), then backslash
207 *  escapes are processed per the C programming language.  If they are
208 *  single quote strings, then the backslashes are honored only when they
209 *  precede another backslash or a single quote character.
210 *
211 * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
212=*/
213char*
214ao_string_cook( char* pzScan, int* pLineCt )
215{
216    int   l = 0;
217    char  q = *pzScan;
218
219    /*
220     *  It is a quoted string.  Process the escape sequence characters
221     *  (in the set "abfnrtv") and make sure we find a closing quote.
222     */
223    char* pzD = pzScan++;
224    char* pzS = pzScan;
225
226    if (pLineCt == NULL)
227        pLineCt = &l;
228
229    for (;;) {
230        /*
231         *  IF the next character is the quote character, THEN we may end the
232         *  string.  We end it unless the next non-blank character *after* the
233         *  string happens to also be a quote.  If it is, then we will change
234         *  our quote character to the new quote character and continue
235         *  condensing text.
236         */
237        while (*pzS == q) {
238            *pzD = NUL; /* This is probably the end of the line */
239            pzS++;
240
241        scan_for_quote:
242            while (isspace((int)*pzS))
243                if (*(pzS++) == '\n')
244                    (*pLineCt)++;
245
246            /*
247             *  IF the next character is a quote character,
248             *  THEN we will concatenate the strings.
249             */
250            switch (*pzS) {
251            case '"':
252            case '\'':
253                break;
254
255            case '/':
256                /*
257                 *  Allow for a comment embedded in the concatenated string.
258                 */
259                switch (pzS[1]) {
260                default:  return NULL;
261                case '/':
262                    /*
263                     *  Skip to end of line
264                     */
265                    pzS = strchr( pzS, '\n' );
266                    if (pzS == NULL)
267                        return NULL;
268                    (*pLineCt)++;
269                    break;
270
271                case '*':
272                {
273                    char* p = strstr( pzS+2, "*/" );
274                    /*
275                     *  Skip to terminating star slash
276                     */
277                    if (p == NULL)
278                        return NULL;
279                    while (pzS < p) {
280                        if (*(pzS++) == '\n')
281                            (*pLineCt)++;
282                    }
283
284                    pzS = p + 2;
285                }
286                }
287                goto scan_for_quote;
288
289            default:
290                /*
291                 *  The next non-whitespace character is not a quote.
292                 *  The series of quoted strings has come to an end.
293                 */
294                return pzS;
295            }
296
297            q = *(pzS++);  /* assign new quote character and advance scan */
298        }
299
300        /*
301         *  We are inside a quoted string.  Copy text.
302         */
303        switch (*(pzD++) = *(pzS++)) {
304        case NUL:
305            return NULL;
306
307        case '\n':
308            (*pLineCt)++;
309            break;
310
311        case '\\':
312            /*
313             *  IF we are escaping a new line,
314             *  THEN drop both the escape and the newline from
315             *       the result string.
316             */
317            if (*pzS == '\n') {
318                pzS++;
319                pzD--;
320                (*pLineCt)++;
321            }
322
323            /*
324             *  ELSE IF the quote character is '"' or '`',
325             *  THEN we do the full escape character processing
326             */
327            else if (q != '\'') {
328                int ct = ao_string_cook_escape_char( pzS, pzD-1, (u_int)'\n' );
329                if (ct == 0)
330                    return NULL;
331
332                pzS += ct;
333            }     /* if (q != '\'')                  */
334
335            /*
336             *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
337             *  The latter only to easily hide preprocessing directives.
338             */
339            else switch (*pzS) {
340            case '\\':
341            case '\'':
342            case '#':
343                pzD[-1] = *pzS++;
344            }
345        }     /* switch (*(pzD++) = *(pzS++))    */
346    }         /* for (;;)                        */
347}
348/*
349 * Local Variables:
350 * mode: C
351 * c-file-style: "stroustrup"
352 * indent-tabs-mode: nil
353 * End:
354 * end of autoopts/cook.c */
355