1//
2// Automated Testing Framework (atf)
3//
4// Copyright (c) 2007, 2008, 2010 The NetBSD Foundation, Inc.
5// All rights reserved.
6//
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions
9// are met:
10// 1. Redistributions of source code must retain the above copyright
11//    notice, this list of conditions and the following disclaimer.
12// 2. Redistributions in binary form must reproduce the above copyright
13//    notice, this list of conditions and the following disclaimer in the
14//    documentation and/or other materials provided with the distribution.
15//
16// THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND
17// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
18// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20// IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY
21// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29
30#if !defined(_ATF_CXX_PARSER_HPP_)
31#define _ATF_CXX_PARSER_HPP_
32
33#include <istream>
34#include <map>
35#include <ostream>
36#include <stdexcept>
37#include <string>
38#include <utility>
39#include <vector>
40
41namespace atf {
42namespace parser {
43
44// ------------------------------------------------------------------------
45// The "parse_error" class.
46// ------------------------------------------------------------------------
47
48class parse_error : public std::runtime_error,
49                    public std::pair< size_t, std::string > {
50    mutable std::string m_msg;
51
52public:
53    parse_error(size_t, std::string);
54    ~parse_error(void) throw();
55
56    const char* what(void) const throw();
57
58    operator std::string(void) const;
59};
60
61// ------------------------------------------------------------------------
62// The "parse_errors" class.
63// ------------------------------------------------------------------------
64
65class parse_errors : public std::runtime_error,
66                     public std::vector< parse_error > {
67    std::vector< parse_error > m_errors;
68    mutable std::string m_msg;
69
70public:
71    parse_errors(void);
72    ~parse_errors(void) throw();
73
74    const char* what(void) const throw();
75};
76
77// ------------------------------------------------------------------------
78// The "format_error" class.
79// ------------------------------------------------------------------------
80
81class format_error : public std::runtime_error {
82public:
83    format_error(const std::string&);
84};
85
86// ------------------------------------------------------------------------
87// The "token" class.
88// ------------------------------------------------------------------------
89
90typedef int token_type;
91
92//!
93//! \brief Representation of a read token.
94//!
95//! A pair that contains the information of a token read from a stream.
96//! It contains the token's type and its associated data, if any.
97//!
98struct token {
99    bool m_inited;
100    size_t m_line;
101    token_type m_type;
102    std::string m_text;
103
104public:
105    token(void);
106    token(size_t, const token_type&, const std::string& = "");
107
108    size_t lineno(void) const;
109    const token_type& type(void) const;
110    const std::string& text(void) const;
111
112    operator bool(void) const;
113    bool operator!(void) const;
114};
115
116// ------------------------------------------------------------------------
117// The "tokenizer" class.
118// ------------------------------------------------------------------------
119
120//!
121//! \brief A stream tokenizer.
122//!
123//! This template implements an extremely simple, line-oriented stream
124//! tokenizer.  It is only able to recognize one character-long delimiters,
125//! random-length keywords, skip whitespace and, anything that does not
126//! match these rules is supposed to be a word.
127//!
128//! Parameter IS: The input stream's type.
129//!
130template< class IS >
131class tokenizer {
132    IS& m_is;
133    size_t m_lineno;
134    token m_la;
135
136    bool m_skipws;
137    token_type m_eof_type, m_nl_type, m_text_type;
138
139    std::map< char, token_type > m_delims_map;
140    std::string m_delims_str;
141
142    char m_quotech;
143    token_type m_quotetype;
144
145    std::map< std::string, token_type > m_keywords_map;
146
147    token_type alloc_type(void);
148
149    template< class TKZ >
150    friend
151    class parser;
152
153public:
154    tokenizer(IS&, bool, const token_type&, const token_type&,
155              const token_type&, size_t = 1);
156
157    size_t lineno(void) const;
158
159    void add_delim(char, const token_type&);
160    void add_keyword(const std::string&, const token_type&);
161    void add_quote(char, const token_type&);
162
163    token next(void);
164    std::string rest_of_line(void);
165};
166
167template< class IS >
168tokenizer< IS >::tokenizer(IS& p_is,
169                           bool p_skipws,
170                           const token_type& p_eof_type,
171                           const token_type& p_nl_type,
172                           const token_type& p_text_type,
173                           size_t p_lineno) :
174    m_is(p_is),
175    m_lineno(p_lineno),
176    m_skipws(p_skipws),
177    m_eof_type(p_eof_type),
178    m_nl_type(p_nl_type),
179    m_text_type(p_text_type),
180    m_quotech(-1)
181{
182}
183
184template< class IS >
185size_t
186tokenizer< IS >::lineno(void)
187    const
188{
189    return m_lineno;
190}
191
192template< class IS >
193void
194tokenizer< IS >::add_delim(char delim, const token_type& type)
195{
196    m_delims_map[delim] = type;
197    m_delims_str += delim;
198}
199
200template< class IS >
201void
202tokenizer< IS >::add_keyword(const std::string& keyword,
203                             const token_type& type)
204{
205    m_keywords_map[keyword] = type;
206}
207
208template< class IS >
209void
210tokenizer< IS >::add_quote(char ch, const token_type& type)
211{
212    m_quotech = ch;
213    m_quotetype = type;
214}
215
216template< class IS >
217token
218tokenizer< IS >::next(void)
219{
220    if (m_la) {
221        token t = m_la;
222        m_la = token();
223        if (t.type() == m_nl_type)
224            m_lineno++;
225        return t;
226    }
227
228    char ch;
229    std::string text;
230
231    bool done = false, quoted = false;
232    token t(m_lineno, m_eof_type, "<<EOF>>");
233    while (!done && m_is.get(ch).good()) {
234        if (ch == m_quotech) {
235            if (text.empty()) {
236                bool escaped = false;
237                while (!done && m_is.get(ch).good()) {
238                    if (!escaped) {
239                        if (ch == '\\')
240                            escaped = true;
241                        else if (ch == '\n') {
242                            m_la = token(m_lineno, m_nl_type, "<<NEWLINE>>");
243                            throw parse_error(t.lineno(),
244                                              "Missing double quotes before "
245                                              "end of line");
246                        } else if (ch == m_quotech)
247                            done = true;
248                        else
249                            text += ch;
250                    } else {
251                        text += ch;
252                        escaped = false;
253                    }
254                }
255                if (!m_is.good())
256                    throw parse_error(t.lineno(),
257                                      "Missing double quotes before "
258                                      "end of file");
259                t = token(m_lineno, m_text_type, text);
260                quoted = true;
261            } else {
262                m_is.unget();
263                done = true;
264            }
265        } else {
266            typename std::map< char, token_type >::const_iterator idelim;
267            idelim = m_delims_map.find(ch);
268            if (idelim != m_delims_map.end()) {
269                done = true;
270                if (text.empty())
271                    t = token(m_lineno, (*idelim).second,
272                                   std::string("") + ch);
273                else
274                    m_is.unget();
275            } else if (ch == '\n') {
276                done = true;
277                if (text.empty())
278                    t = token(m_lineno, m_nl_type, "<<NEWLINE>>");
279                else
280                    m_is.unget();
281            } else if (m_skipws && (ch == ' ' || ch == '\t')) {
282                if (!text.empty())
283                    done = true;
284            } else
285                text += ch;
286        }
287    }
288
289    if (!quoted && !text.empty()) {
290        typename std::map< std::string, token_type >::const_iterator ikw;
291        ikw = m_keywords_map.find(text);
292        if (ikw != m_keywords_map.end())
293            t = token(m_lineno, (*ikw).second, text);
294        else
295            t = token(m_lineno, m_text_type, text);
296    }
297
298    if (t.type() == m_nl_type)
299        m_lineno++;
300
301    return t;
302}
303
304template< class IS >
305std::string
306tokenizer< IS >::rest_of_line(void)
307{
308    std::string str;
309    while (m_is.good() && m_is.peek() != '\n')
310        str += m_is.get();
311    return str;
312}
313
314// ------------------------------------------------------------------------
315// The "parser" class.
316// ------------------------------------------------------------------------
317
318template< class TKZ >
319class parser {
320    TKZ& m_tkz;
321    token m_last;
322    parse_errors m_errors;
323    bool m_thrown;
324
325public:
326    parser(TKZ& tkz);
327    ~parser(void);
328
329    bool good(void) const;
330    void add_error(const parse_error&);
331    bool has_errors(void) const;
332
333    token next(void);
334    std::string rest_of_line(void);
335    token reset(const token_type&);
336
337    token
338    expect(const token_type&,
339           const std::string&);
340
341    token
342    expect(const token_type&,
343           const token_type&,
344           const std::string&);
345
346    token
347    expect(const token_type&,
348           const token_type&,
349           const token_type&,
350           const std::string&);
351
352    token
353    expect(const token_type&,
354           const token_type&,
355           const token_type&,
356           const token_type&,
357           const std::string&);
358
359    token
360    expect(const token_type&,
361           const token_type&,
362           const token_type&,
363           const token_type&,
364           const token_type&,
365           const token_type&,
366           const token_type&,
367           const std::string&);
368
369    token
370    expect(const token_type&,
371           const token_type&,
372           const token_type&,
373           const token_type&,
374           const token_type&,
375           const token_type&,
376           const token_type&,
377           const token_type&,
378           const std::string&);
379};
380
381template< class TKZ >
382parser< TKZ >::parser(TKZ& tkz) :
383    m_tkz(tkz),
384    m_thrown(false)
385{
386}
387
388template< class TKZ >
389parser< TKZ >::~parser(void)
390{
391    if (!m_errors.empty() && !m_thrown)
392        throw m_errors;
393}
394
395template< class TKZ >
396bool
397parser< TKZ >::good(void)
398    const
399{
400    return m_tkz.m_is.good();
401}
402
403template< class TKZ >
404void
405parser< TKZ >::add_error(const parse_error& pe)
406{
407    m_errors.push_back(pe);
408}
409
410template< class TKZ >
411bool
412parser< TKZ >::has_errors(void)
413    const
414{
415    return !m_errors.empty();
416}
417
418template< class TKZ >
419token
420parser< TKZ >::next(void)
421{
422    token t = m_tkz.next();
423
424    m_last = t;
425
426    if (t.type() == m_tkz.m_eof_type) {
427        if (!m_errors.empty()) {
428            m_thrown = true;
429            throw m_errors;
430        }
431    }
432
433    return t;
434}
435
436template< class TKZ >
437std::string
438parser< TKZ >::rest_of_line(void)
439{
440    return m_tkz.rest_of_line();
441}
442
443template< class TKZ >
444token
445parser< TKZ >::reset(const token_type& stop)
446{
447    token t = m_last;
448
449    while (t.type() != m_tkz.m_eof_type && t.type() != stop)
450        t = next();
451
452    return t;
453}
454
455template< class TKZ >
456token
457parser< TKZ >::expect(const token_type& t1,
458                      const std::string& textual)
459{
460    token t = next();
461
462    if (t.type() != t1)
463        throw parse_error(t.lineno(),
464                          "Unexpected token `" + t.text() +
465                          "'; expected " + textual);
466
467    return t;
468}
469
470template< class TKZ >
471token
472parser< TKZ >::expect(const token_type& t1,
473                      const token_type& t2,
474                      const std::string& textual)
475{
476    token t = next();
477
478    if (t.type() != t1 && t.type() != t2)
479        throw parse_error(t.lineno(),
480                          "Unexpected token `" + t.text() +
481                          "'; expected " + textual);
482
483    return t;
484}
485
486template< class TKZ >
487token
488parser< TKZ >::expect(const token_type& t1,
489                      const token_type& t2,
490                      const token_type& t3,
491                      const std::string& textual)
492{
493    token t = next();
494
495    if (t.type() != t1 && t.type() != t2 && t.type() != t3)
496        throw parse_error(t.lineno(),
497                          "Unexpected token `" + t.text() +
498                          "'; expected " + textual);
499
500    return t;
501}
502
503template< class TKZ >
504token
505parser< TKZ >::expect(const token_type& t1,
506                      const token_type& t2,
507                      const token_type& t3,
508                      const token_type& t4,
509                      const std::string& textual)
510{
511    token t = next();
512
513    if (t.type() != t1 && t.type() != t2 && t.type() != t3 &&
514        t.type() != t4)
515        throw parse_error(t.lineno(),
516                          "Unexpected token `" + t.text() +
517                          "'; expected " + textual);
518
519    return t;
520}
521
522template< class TKZ >
523token
524parser< TKZ >::expect(const token_type& t1,
525                      const token_type& t2,
526                      const token_type& t3,
527                      const token_type& t4,
528                      const token_type& t5,
529                      const token_type& t6,
530                      const token_type& t7,
531                      const std::string& textual)
532{
533    token t = next();
534
535    if (t.type() != t1 && t.type() != t2 && t.type() != t3 &&
536        t.type() != t4 && t.type() != t5 && t.type() != t6 &&
537        t.type() != t7)
538        throw parse_error(t.lineno(),
539                          "Unexpected token `" + t.text() +
540                          "'; expected " + textual);
541
542    return t;
543}
544
545template< class TKZ >
546token
547parser< TKZ >::expect(const token_type& t1,
548                      const token_type& t2,
549                      const token_type& t3,
550                      const token_type& t4,
551                      const token_type& t5,
552                      const token_type& t6,
553                      const token_type& t7,
554                      const token_type& t8,
555                      const std::string& textual)
556{
557    token t = next();
558
559    if (t.type() != t1 && t.type() != t2 && t.type() != t3 &&
560        t.type() != t4 && t.type() != t5 && t.type() != t6 &&
561        t.type() != t7 && t.type() != t8)
562        throw parse_error(t.lineno(),
563                          "Unexpected token `" + t.text() +
564                          "'; expected " + textual);
565
566    return t;
567}
568
569#define ATF_PARSER_CALLBACK(parser, func) \
570    do { \
571        if (!(parser).has_errors()) \
572            func; \
573    } while (false)
574
575// ------------------------------------------------------------------------
576// Header parsing.
577// ------------------------------------------------------------------------
578
579typedef std::map< std::string, std::string > attrs_map;
580
581class header_entry {
582    std::string m_name;
583    std::string m_value;
584    attrs_map m_attrs;
585
586public:
587    header_entry(void);
588    header_entry(const std::string&, const std::string&,
589                 attrs_map = attrs_map());
590
591    const std::string& name(void) const;
592    const std::string& value(void) const;
593    const attrs_map& attrs(void) const;
594    bool has_attr(const std::string&) const;
595    const std::string& get_attr(const std::string&) const;
596};
597
598typedef std::map< std::string, header_entry > headers_map;
599
600std::pair< size_t, headers_map > read_headers(std::istream&, size_t);
601void write_headers(const headers_map&, std::ostream&);
602void validate_content_type(const headers_map&, const std::string&, int);
603
604} // namespace parser
605} // namespace atf
606
607#endif // !defined(_ATF_CXX_PARSER_HPP_)
608