1285612Sdelphij/**
2285612Sdelphij * \file cook.c
3181834Sroberto *
4181834Sroberto *  This file contains the routines that deal with processing quoted strings
5181834Sroberto *  into an internal format.
6285612Sdelphij *
7285612Sdelphij * @addtogroup autoopts
8285612Sdelphij * @{
9181834Sroberto */
10181834Sroberto/*
11285612Sdelphij *  This file is part of AutoOpts, a companion to AutoGen.
12285612Sdelphij *  AutoOpts is free software.
13285612Sdelphij *  AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
14181834Sroberto *
15285612Sdelphij *  AutoOpts is available under any one of two licenses.  The license
16285612Sdelphij *  in use must be one of these two and the choice is under the control
17285612Sdelphij *  of the user of the license.
18181834Sroberto *
19285612Sdelphij *   The GNU Lesser General Public License, version 3 or later
20285612Sdelphij *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
21181834Sroberto *
22285612Sdelphij *   The Modified Berkeley Software Distribution License
23285612Sdelphij *      See the file "COPYING.mbsd"
24181834Sroberto *
25285612Sdelphij *  These files have the following sha256 sums:
26181834Sroberto *
27285612Sdelphij *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
28285612Sdelphij *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
29285612Sdelphij *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
30181834Sroberto */
31181834Sroberto
32181834Sroberto/* = = = START-STATIC-FORWARD = = = */
33285612Sdelphijstatic bool
34285612Sdelphijcontiguous_quote(char ** pps, char * pq, int * lnct_p);
35181834Sroberto/* = = = END-STATIC-FORWARD = = = */
36181834Sroberto
37181834Sroberto/*=export_func  ao_string_cook_escape_char
38181834Sroberto * private:
39181834Sroberto *
40181834Sroberto * what:  escape-process a string fragment
41285612Sdelphij * arg:   + char const * + pzScan  + points to character after the escape +
42285612Sdelphij * arg:   + char *       + pRes    + Where to put the result byte +
43181834Sroberto * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
44181834Sroberto *
45181834Sroberto * ret-type: unsigned int
46181834Sroberto * ret-desc: The number of bytes consumed processing the escaped character.
47181834Sroberto *
48181834Sroberto * doc:
49181834Sroberto *
50181834Sroberto *  This function converts "t" into "\t" and all your other favorite
51181834Sroberto *  escapes, including numeric ones:  hex and ocatal, too.
52181834Sroberto *  The returned result tells the caller how far to advance the
53181834Sroberto *  scan pointer (passed in).  The default is to just pass through the
54181834Sroberto *  escaped character and advance the scan by one.
55181834Sroberto *
56181834Sroberto *  Some applications need to keep an escaped newline, others need to
57181834Sroberto *  suppress it.  This is accomplished by supplying a '\n' replacement
58181834Sroberto *  character that is different from \n, if need be.  For example, use
59181834Sroberto *  0x7F and never emit a 0x7F.
60181834Sroberto *
61181834Sroberto * err:  @code{NULL} is returned if the string is mal-formed.
62181834Sroberto=*/
63181834Srobertounsigned int
64285612Sdelphijao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
65181834Sroberto{
66285612Sdelphij    unsigned int res = 1;
67181834Sroberto
68181834Sroberto    switch (*pRes = *pzIn++) {
69181834Sroberto    case NUL:         /* NUL - end of input string */
70181834Sroberto        return 0;
71181834Sroberto    case '\r':
72285612Sdelphij        if (*pzIn != NL)
73181834Sroberto            return 1;
74181834Sroberto        res++;
75181834Sroberto        /* FALLTHROUGH */
76285612Sdelphij    case NL:        /* NL  - emit newline        */
77181834Sroberto        *pRes = (char)nl;
78181834Sroberto        return res;
79181834Sroberto
80181834Sroberto    case 'a': *pRes = '\a'; break;
81181834Sroberto    case 'b': *pRes = '\b'; break;
82181834Sroberto    case 'f': *pRes = '\f'; break;
83285612Sdelphij    case 'n': *pRes = NL;   break;
84181834Sroberto    case 'r': *pRes = '\r'; break;
85181834Sroberto    case 't': *pRes = '\t'; break;
86181834Sroberto    case 'v': *pRes = '\v'; break;
87181834Sroberto
88285612Sdelphij    case 'x':
89285612Sdelphij    case 'X':         /* HEX Escape       */
90285612Sdelphij        if (IS_HEX_DIGIT_CHAR(*pzIn))  {
91285612Sdelphij            char z[4];
92285612Sdelphij            unsigned int ct = 0;
93181834Sroberto
94285612Sdelphij            do  {
95285612Sdelphij                z[ct] = pzIn[ct];
96285612Sdelphij                if (++ct >= 2)
97285612Sdelphij                    break;
98285612Sdelphij            } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
99285612Sdelphij            z[ct] = NUL;
100285612Sdelphij            *pRes = (char)strtoul(z, NULL, 16);
101285612Sdelphij            return ct + 1;
102285612Sdelphij        }
103285612Sdelphij        break;
104181834Sroberto
105285612Sdelphij    case '0': case '1': case '2': case '3':
106285612Sdelphij    case '4': case '5': case '6': case '7':
107285612Sdelphij    {
108285612Sdelphij        /*
109285612Sdelphij         *  IF the character copied was an octal digit,
110285612Sdelphij         *  THEN set the output character to an octal value.
111285612Sdelphij         *  The 3 octal digit result might exceed 0xFF, so check it.
112285612Sdelphij         */
113285612Sdelphij        char z[4];
114285612Sdelphij        unsigned long val;
115285612Sdelphij        unsigned int  ct = 0;
116181834Sroberto
117285612Sdelphij        z[ct++] = *--pzIn;
118285612Sdelphij        while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
119285612Sdelphij            z[ct] = pzIn[ct];
120285612Sdelphij            if (++ct >= 3)
121181834Sroberto                break;
122181834Sroberto        }
123181834Sroberto
124285612Sdelphij        z[ct] = NUL;
125285612Sdelphij        val = strtoul(z, NULL, 8);
126285612Sdelphij        if (val > 0xFF)
127285612Sdelphij            val = 0xFF;
128285612Sdelphij        *pRes = (char)val;
129285612Sdelphij        return ct;
130285612Sdelphij    }
131285612Sdelphij
132285612Sdelphij    default: /* quoted character is result character */;
133285612Sdelphij    }
134285612Sdelphij
135285612Sdelphij    return res;
136285612Sdelphij}
137285612Sdelphij
138285612Sdelphij
139285612Sdelphij/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
140285612Sdelphij *
141285612Sdelphij *  A quoted string has been found.
142285612Sdelphij *  Find the end of it and compress any escape sequences.
143285612Sdelphij */
144285612Sdelphijstatic bool
145285612Sdelphijcontiguous_quote(char ** pps, char * pq, int * lnct_p)
146285612Sdelphij{
147285612Sdelphij    char * ps = *pps + 1;
148285612Sdelphij
149285612Sdelphij    for (;;) {
150285612Sdelphij        while (IS_WHITESPACE_CHAR(*ps))
151285612Sdelphij            if (*(ps++) == NL)
152285612Sdelphij                (*lnct_p)++;
153285612Sdelphij
154181834Sroberto        /*
155285612Sdelphij         *  IF the next character is a quote character,
156285612Sdelphij         *  THEN we will concatenate the strings.
157181834Sroberto         */
158285612Sdelphij        switch (*ps) {
159285612Sdelphij        case '"':
160285612Sdelphij        case '\'':
161285612Sdelphij            *pq  = *(ps++);  /* assign new quote character and return */
162285612Sdelphij            *pps = ps;
163285612Sdelphij            return true;
164181834Sroberto
165285612Sdelphij        case '/':
166181834Sroberto            /*
167285612Sdelphij             *  Allow for a comment embedded in the concatenated string.
168181834Sroberto             */
169285612Sdelphij            switch (ps[1]) {
170285612Sdelphij            default:
171285612Sdelphij                *pps = NULL;
172285612Sdelphij                return false;
173285612Sdelphij
174285612Sdelphij            case '/':
175285612Sdelphij                /*
176285612Sdelphij                 *  Skip to end of line
177285612Sdelphij                 */
178285612Sdelphij                ps = strchr(ps, NL);
179285612Sdelphij                if (ps == NULL) {
180285612Sdelphij                    *pps = NULL;
181285612Sdelphij                    return false;
182285612Sdelphij                }
183181834Sroberto                break;
184181834Sroberto
185285612Sdelphij            case '*':
186285612Sdelphij            {
187285612Sdelphij                char * p = strstr( ps+2, "*/" );
188285612Sdelphij                /*
189285612Sdelphij                 *  Skip to terminating star slash
190285612Sdelphij                 */
191285612Sdelphij                if (p == NULL) {
192285612Sdelphij                    *pps = NULL;
193285612Sdelphij                    return false;
194285612Sdelphij                }
195181834Sroberto
196285612Sdelphij                while (ps < p) {
197285612Sdelphij                    if (*(ps++) == NL)
198285612Sdelphij                        (*lnct_p)++;
199285612Sdelphij                }
200285612Sdelphij
201285612Sdelphij                ps = p + 2;
202181834Sroberto            }
203285612Sdelphij            }
204285612Sdelphij            continue;
205181834Sroberto
206285612Sdelphij        default:
207181834Sroberto            /*
208285612Sdelphij             *  The next non-whitespace character is not a quote.
209285612Sdelphij             *  The series of quoted strings has come to an end.
210181834Sroberto             */
211285612Sdelphij            *pps = ps;
212285612Sdelphij            return false;
213181834Sroberto        }
214181834Sroberto    }
215181834Sroberto}
216181834Sroberto
217181834Sroberto/*=export_func  ao_string_cook
218181834Sroberto * private:
219181834Sroberto *
220181834Sroberto * what:  concatenate and escape-process strings
221285612Sdelphij * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
222285612Sdelphij * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
223181834Sroberto *
224285612Sdelphij * ret-type: char *
225181834Sroberto * ret-desc: The address of the text following the processed strings.
226181834Sroberto *           The return value is NULL if the strings are ill-formed.
227181834Sroberto *
228181834Sroberto * doc:
229181834Sroberto *
230181834Sroberto *  A series of one or more quoted strings are concatenated together.
231181834Sroberto *  If they are quoted with double quotes (@code{"}), then backslash
232181834Sroberto *  escapes are processed per the C programming language.  If they are
233181834Sroberto *  single quote strings, then the backslashes are honored only when they
234181834Sroberto *  precede another backslash or a single quote character.
235181834Sroberto *
236181834Sroberto * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
237181834Sroberto=*/
238285612Sdelphijchar *
239285612Sdelphijao_string_cook(char * pzScan, int * lnct_p)
240181834Sroberto{
241181834Sroberto    int   l = 0;
242181834Sroberto    char  q = *pzScan;
243181834Sroberto
244181834Sroberto    /*
245181834Sroberto     *  It is a quoted string.  Process the escape sequence characters
246181834Sroberto     *  (in the set "abfnrtv") and make sure we find a closing quote.
247181834Sroberto     */
248285612Sdelphij    char * pzD = pzScan++;
249285612Sdelphij    char * pzS = pzScan;
250181834Sroberto
251285612Sdelphij    if (lnct_p == NULL)
252285612Sdelphij        lnct_p = &l;
253181834Sroberto
254181834Sroberto    for (;;) {
255181834Sroberto        /*
256181834Sroberto         *  IF the next character is the quote character, THEN we may end the
257181834Sroberto         *  string.  We end it unless the next non-blank character *after* the
258181834Sroberto         *  string happens to also be a quote.  If it is, then we will change
259181834Sroberto         *  our quote character to the new quote character and continue
260181834Sroberto         *  condensing text.
261181834Sroberto         */
262181834Sroberto        while (*pzS == q) {
263181834Sroberto            *pzD = NUL; /* This is probably the end of the line */
264285612Sdelphij            if (! contiguous_quote(&pzS, &q, lnct_p))
265181834Sroberto                return pzS;
266181834Sroberto        }
267181834Sroberto
268181834Sroberto        /*
269181834Sroberto         *  We are inside a quoted string.  Copy text.
270181834Sroberto         */
271181834Sroberto        switch (*(pzD++) = *(pzS++)) {
272181834Sroberto        case NUL:
273181834Sroberto            return NULL;
274181834Sroberto
275285612Sdelphij        case NL:
276285612Sdelphij            (*lnct_p)++;
277181834Sroberto            break;
278181834Sroberto
279181834Sroberto        case '\\':
280181834Sroberto            /*
281181834Sroberto             *  IF we are escaping a new line,
282181834Sroberto             *  THEN drop both the escape and the newline from
283181834Sroberto             *       the result string.
284181834Sroberto             */
285285612Sdelphij            if (*pzS == NL) {
286181834Sroberto                pzS++;
287181834Sroberto                pzD--;
288285612Sdelphij                (*lnct_p)++;
289181834Sroberto            }
290181834Sroberto
291181834Sroberto            /*
292181834Sroberto             *  ELSE IF the quote character is '"' or '`',
293181834Sroberto             *  THEN we do the full escape character processing
294181834Sroberto             */
295181834Sroberto            else if (q != '\'') {
296285612Sdelphij                unsigned int ct;
297285612Sdelphij                ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
298181834Sroberto                if (ct == 0)
299181834Sroberto                    return NULL;
300181834Sroberto
301181834Sroberto                pzS += ct;
302181834Sroberto            }     /* if (q != '\'')                  */
303181834Sroberto
304181834Sroberto            /*
305181834Sroberto             *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
306181834Sroberto             *  The latter only to easily hide preprocessing directives.
307181834Sroberto             */
308181834Sroberto            else switch (*pzS) {
309181834Sroberto            case '\\':
310181834Sroberto            case '\'':
311181834Sroberto            case '#':
312181834Sroberto                pzD[-1] = *pzS++;
313181834Sroberto            }
314181834Sroberto        }     /* switch (*(pzD++) = *(pzS++))    */
315181834Sroberto    }         /* for (;;)                        */
316181834Sroberto}
317285612Sdelphij
318285612Sdelphij/** @}
319285612Sdelphij *
320181834Sroberto * Local Variables:
321181834Sroberto * mode: C
322181834Sroberto * c-file-style: "stroustrup"
323181834Sroberto * indent-tabs-mode: nil
324181834Sroberto * End:
325181834Sroberto * end of autoopts/cook.c */
326