1/**
2 * \file cook.c
3 *
4 *  This file contains the routines that deal with processing quoted strings
5 *  into an internal format.
6 *
7 * @addtogroup autoopts
8 * @{
9 */
10/*
11 *  This file is part of AutoOpts, a companion to AutoGen.
12 *  AutoOpts is free software.
13 *  AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
14 *
15 *  AutoOpts is available under any one of two licenses.  The license
16 *  in use must be one of these two and the choice is under the control
17 *  of the user of the license.
18 *
19 *   The GNU Lesser General Public License, version 3 or later
20 *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
21 *
22 *   The Modified Berkeley Software Distribution License
23 *      See the file "COPYING.mbsd"
24 *
25 *  These files have the following sha256 sums:
26 *
27 *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
28 *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
29 *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
30 */
31
32/*=export_func  ao_string_cook_escape_char
33 * private:
34 *
35 * what:  escape-process a string fragment
36 * arg:   + char const * + pzScan  + points to character after the escape +
37 * arg:   + char *       + pRes    + Where to put the result byte +
38 * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
39 *
40 * ret-type: unsigned int
41 * ret-desc: The number of bytes consumed processing the escaped character.
42 *
43 * doc:
44 *
45 *  This function converts "t" into "\t" and all your other favorite
46 *  escapes, including numeric ones:  hex and ocatal, too.
47 *  The returned result tells the caller how far to advance the
48 *  scan pointer (passed in).  The default is to just pass through the
49 *  escaped character and advance the scan by one.
50 *
51 *  Some applications need to keep an escaped newline, others need to
52 *  suppress it.  This is accomplished by supplying a '\n' replacement
53 *  character that is different from \n, if need be.  For example, use
54 *  0x7F and never emit a 0x7F.
55 *
56 * err:  @code{NULL} is returned if the string is mal-formed.
57=*/
58unsigned int
59ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
60{
61    unsigned int res = 1;
62
63    switch (*pRes = *pzIn++) {
64    case NUL:         /* NUL - end of input string */
65        return 0;
66    case '\r':
67        if (*pzIn != NL)
68            return 1;
69        res++;
70        /* FALLTHROUGH */
71    case NL:        /* NL  - emit newline        */
72        *pRes = (char)nl;
73        return res;
74
75    case 'a': *pRes = '\a'; break;
76    case 'b': *pRes = '\b'; break;
77    case 'f': *pRes = '\f'; break;
78    case 'n': *pRes = NL;   break;
79    case 'r': *pRes = '\r'; break;
80    case 't': *pRes = '\t'; break;
81    case 'v': *pRes = '\v'; break;
82
83    case 'x':
84    case 'X':         /* HEX Escape       */
85        if (IS_HEX_DIGIT_CHAR(*pzIn))  {
86            char z[4];
87            unsigned int ct = 0;
88
89            do  {
90                z[ct] = pzIn[ct];
91                if (++ct >= 2)
92                    break;
93            } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
94            z[ct] = NUL;
95            *pRes = (char)strtoul(z, NULL, 16);
96            return ct + 1;
97        }
98        break;
99
100    case '0': case '1': case '2': case '3':
101    case '4': case '5': case '6': case '7':
102    {
103        /*
104         *  IF the character copied was an octal digit,
105         *  THEN set the output character to an octal value.
106         *  The 3 octal digit result might exceed 0xFF, so check it.
107         */
108        char z[4];
109        unsigned long val;
110        unsigned int  ct = 0;
111
112        z[ct++] = *--pzIn;
113        while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
114            z[ct] = pzIn[ct];
115            if (++ct >= 3)
116                break;
117        }
118
119        z[ct] = NUL;
120        val = strtoul(z, NULL, 8);
121        if (val > 0xFF)
122            val = 0xFF;
123        *pRes = (char)val;
124        return ct;
125    }
126
127    default: /* quoted character is result character */;
128    }
129
130    return res;
131}
132
133/**
134 * count newlines between start and end
135 */
136static char *
137nl_count(char * start, char * end, int * lnct_p)
138{
139    while (start < end) {
140        if (*(start++) == NL)
141            (*lnct_p)++;
142    }
143    return end;
144}
145
146/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
147 *
148 *  A quoted string has been found.
149 *  Find the end of it and compress any escape sequences.
150 */
151static bool
152contiguous_quote(char ** pps, char * pq, int * lnct_p)
153{
154    char * ps = *pps + 1;
155
156    for (;;) {
157        while (IS_WHITESPACE_CHAR(*ps))
158            if (*(ps++) == NL)
159                (*lnct_p)++;
160
161        /*
162         *  IF the next character is a quote character,
163         *  THEN we will concatenate the strings.
164         */
165        switch (*ps) {
166        case '"':
167        case '\'':
168            *pq  = *(ps++);  /* assign new quote character and return */
169            *pps = ps;
170            return true;
171
172        case '/':
173            /*
174             *  Allow for a comment embedded in the concatenated string.
175             */
176            switch (ps[1]) {
177            default:
178                goto fail_return;
179
180            case '/':
181                /*
182                 *  Skip to end of line
183                 */
184                ps = strchr(ps, NL);
185                if (ps == NULL)
186                    goto fail_return;
187                break;
188
189            case '*':
190                ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
191                if (ps == NULL)
192                    goto fail_return;
193                ps += 2;
194            }
195            continue;
196
197        default:
198            /*
199             *  The next non-whitespace character is not a quote.
200             *  The series of quoted strings has come to an end.
201             */
202            *pps = ps;
203            return false;
204        }
205    }
206
207 fail_return:
208    *pps = NULL;
209    return false;
210}
211
212/*=export_func  ao_string_cook
213 * private:
214 *
215 * what:  concatenate and escape-process strings
216 * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
217 * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
218 *
219 * ret-type: char *
220 * ret-desc: The address of the text following the processed strings.
221 *           The return value is NULL if the strings are ill-formed.
222 *
223 * doc:
224 *
225 *  A series of one or more quoted strings are concatenated together.
226 *  If they are quoted with double quotes (@code{"}), then backslash
227 *  escapes are processed per the C programming language.  If they are
228 *  single quote strings, then the backslashes are honored only when they
229 *  precede another backslash or a single quote character.
230 *
231 * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
232=*/
233char *
234ao_string_cook(char * pzScan, int * lnct_p)
235{
236    int   l = 0;
237    char  q = *pzScan;
238
239    /*
240     *  It is a quoted string.  Process the escape sequence characters
241     *  (in the set "abfnrtv") and make sure we find a closing quote.
242     */
243    char * pzD = pzScan++;
244    char * pzS = pzScan;
245
246    if (lnct_p == NULL)
247        lnct_p = &l;
248
249    for (;;) {
250        /*
251         *  IF the next character is the quote character, THEN we may end the
252         *  string.  We end it unless the next non-blank character *after* the
253         *  string happens to also be a quote.  If it is, then we will change
254         *  our quote character to the new quote character and continue
255         *  condensing text.
256         */
257        while (*pzS == q) {
258            *pzD = NUL; /* This is probably the end of the line */
259            if (! contiguous_quote(&pzS, &q, lnct_p))
260                return pzS;
261        }
262
263        /*
264         *  We are inside a quoted string.  Copy text.
265         */
266        switch (*(pzD++) = *(pzS++)) {
267        case NUL:
268            return NULL;
269
270        case NL:
271            (*lnct_p)++;
272            break;
273
274        case '\\':
275            /*
276             *  IF we are escaping a new line,
277             *  THEN drop both the escape and the newline from
278             *       the result string.
279             */
280            if (*pzS == NL) {
281                pzS++;
282                pzD--;
283                (*lnct_p)++;
284            }
285
286            /*
287             *  ELSE IF the quote character is '"' or '`',
288             *  THEN we do the full escape character processing
289             */
290            else if (q != '\'') {
291                unsigned int ct;
292                ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
293                if (ct == 0)
294                    return NULL;
295
296                pzS += ct;
297            }     /* if (q != '\'')                  */
298
299            /*
300             *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
301             *  The latter only to easily hide preprocessing directives.
302             */
303            else switch (*pzS) {
304            case '\\':
305            case '\'':
306            case '#':
307                pzD[-1] = *pzS++;
308            }
309        }     /* switch (*(pzD++) = *(pzS++))    */
310    }         /* for (;;)                        */
311}
312
313/** @}
314 *
315 * Local Variables:
316 * mode: C
317 * c-file-style: "stroustrup"
318 * indent-tabs-mode: nil
319 * End:
320 * end of autoopts/cook.c */
321