1/*
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10   Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
11   Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
12   Licensed under the MIT license:
13
14   Permission is  hereby granted,  free of charge,  to any  person obtaining
15   a  copy  of  this  software   and  associated  documentation  files  (the
16   "Software"),  to  deal in  the  Software  without restriction,  including
17   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
18   distribute, sublicense, and/or sell copies of the Software, and to permit
19   persons  to whom  the Software  is  furnished to  do so,  subject to  the
20   following conditions:
21
22   The above copyright  notice and this permission notice  shall be included
23   in all copies or substantial portions of the Software.
24
25   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
26   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
27   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
28   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
29   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
30   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
31   USE OR OTHER DEALINGS IN THE SOFTWARE.
32*/
33
34#define CHARSET_MAX 41
35
36static const char *
37getTok(const char **pp) {
38  enum { inAtom, inString, init, inComment };
39  int state = init;
40  const char *tokStart = 0;
41  for (;;) {
42    switch (**pp) {
43    case '\0':
44      return 0;
45    case ' ':
46    case '\r':
47    case '\t':
48    case '\n':
49      if (state == inAtom)
50        return tokStart;
51      break;
52    case '(':
53      if (state == inAtom)
54        return tokStart;
55      if (state != inString)
56        state++;
57      break;
58    case ')':
59      if (state > init)
60        --state;
61      else if (state != inString)
62        return 0;
63      break;
64    case ';':
65    case '/':
66    case '=':
67      if (state == inAtom)
68        return tokStart;
69      if (state == init)
70        return (*pp)++;
71      break;
72    case '\\':
73      ++*pp;
74      if (**pp == '\0')
75        return 0;
76      break;
77    case '"':
78      switch (state) {
79      case inString:
80        ++*pp;
81        return tokStart;
82      case inAtom:
83        return tokStart;
84      case init:
85        tokStart = *pp;
86        state = inString;
87        break;
88      }
89      break;
90    default:
91      if (state == init) {
92        tokStart = *pp;
93        state = inAtom;
94      }
95      break;
96    }
97    ++*pp;
98  }
99  /* not reached */
100}
101
102/* key must be lowercase ASCII */
103
104static int
105matchkey(const char *start, const char *end, const char *key) {
106  if (! start)
107    return 0;
108  for (; start != end; start++, key++)
109    if (*start != *key && *start != 'A' + (*key - 'a'))
110      return 0;
111  return *key == '\0';
112}
113
114void
115getXMLCharset(const char *buf, char *charset) {
116  const char *next, *p;
117
118  charset[0] = '\0';
119  next = buf;
120  p = getTok(&next);
121  if (matchkey(p, next, "text"))
122    strcpy(charset, "us-ascii");
123  else if (! matchkey(p, next, "application"))
124    return;
125  p = getTok(&next);
126  if (! p || *p != '/')
127    return;
128  p = getTok(&next);
129  if (matchkey(p, next, "xml"))
130    isXml = 1;
131  p = getTok(&next);
132  while (p) {
133    if (*p == ';') {
134      p = getTok(&next);
135      if (matchkey(p, next, "charset")) {
136        p = getTok(&next);
137        if (p && *p == '=') {
138          p = getTok(&next);
139          if (p) {
140            char *s = charset;
141            if (*p == '"') {
142              while (++p != next - 1) {
143                if (*p == '\\')
144                  ++p;
145                if (s == charset + CHARSET_MAX - 1) {
146                  charset[0] = '\0';
147                  break;
148                }
149                *s++ = *p;
150              }
151              *s++ = '\0';
152            } else {
153              if (next - p > CHARSET_MAX - 1)
154                break;
155              while (p != next)
156                *s++ = *p++;
157              *s = 0;
158              break;
159            }
160          }
161        }
162      }
163    } else
164      p = getTok(&next);
165  }
166}
167
168int
169main(int argc, char **argv) {
170  char buf[CHARSET_MAX];
171  getXMLCharset(argv[1], buf);
172  printf("charset = \"%s\"\n", buf);
173  return 0;
174}
175