1355604Sdelphij/*
2355604Sdelphij                            __  __            _
3355604Sdelphij                         ___\ \/ /_ __   __ _| |_
4355604Sdelphij                        / _ \\  /| '_ \ / _` | __|
5355604Sdelphij                       |  __//  \| |_) | (_| | |_
6355604Sdelphij                        \___/_/\_\ .__/ \__,_|\__|
7355604Sdelphij                                 |_| XML parser
8355604Sdelphij
9355604Sdelphij   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10355604Sdelphij   Copyright (c) 2000-2017 Expat development team
11355604Sdelphij   Licensed under the MIT license:
12355604Sdelphij
13355604Sdelphij   Permission is  hereby granted,  free of charge,  to any  person obtaining
14355604Sdelphij   a  copy  of  this  software   and  associated  documentation  files  (the
15355604Sdelphij   "Software"),  to  deal in  the  Software  without restriction,  including
16355604Sdelphij   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17355604Sdelphij   distribute, sublicense, and/or sell copies of the Software, and to permit
18355604Sdelphij   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19355604Sdelphij   following conditions:
20355604Sdelphij
21355604Sdelphij   The above copyright  notice and this permission notice  shall be included
22355604Sdelphij   in all copies or substantial portions of the Software.
23355604Sdelphij
24355604Sdelphij   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25355604Sdelphij   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26355604Sdelphij   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27355604Sdelphij   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28355604Sdelphij   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29355604Sdelphij   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30355604Sdelphij   USE OR OTHER DEALINGS IN THE SOFTWARE.
31355604Sdelphij*/
32355604Sdelphij
33104349Sphk#include <string.h>
34104349Sphk#include "xmlmime.h"
35104349Sphk
36104349Sphkstatic const char *
37355604SdelphijgetTok(const char **pp) {
38104349Sphk  /* inComment means one level of nesting; inComment+1 means two levels etc */
39104349Sphk  enum { inAtom, inString, init, inComment };
40104349Sphk  int state = init;
41104349Sphk  const char *tokStart = 0;
42104349Sphk  for (;;) {
43104349Sphk    switch (**pp) {
44104349Sphk    case '\0':
45104349Sphk      if (state == inAtom)
46104349Sphk        return tokStart;
47104349Sphk      return 0;
48104349Sphk    case ' ':
49104349Sphk    case '\r':
50104349Sphk    case '\t':
51104349Sphk    case '\n':
52104349Sphk      if (state == inAtom)
53104349Sphk        return tokStart;
54104349Sphk      break;
55104349Sphk    case '(':
56104349Sphk      if (state == inAtom)
57104349Sphk        return tokStart;
58104349Sphk      if (state != inString)
59104349Sphk        state++;
60104349Sphk      break;
61104349Sphk    case ')':
62104349Sphk      if (state > init)
63104349Sphk        --state;
64104349Sphk      else if (state != inString)
65104349Sphk        return 0;
66104349Sphk      break;
67104349Sphk    case ';':
68104349Sphk    case '/':
69104349Sphk    case '=':
70104349Sphk      if (state == inAtom)
71104349Sphk        return tokStart;
72104349Sphk      if (state == init)
73104349Sphk        return (*pp)++;
74104349Sphk      break;
75104349Sphk    case '\\':
76104349Sphk      ++*pp;
77104349Sphk      if (**pp == '\0')
78104349Sphk        return 0;
79104349Sphk      break;
80104349Sphk    case '"':
81104349Sphk      switch (state) {
82104349Sphk      case inString:
83104349Sphk        ++*pp;
84104349Sphk        return tokStart;
85104349Sphk      case inAtom:
86104349Sphk        return tokStart;
87104349Sphk      case init:
88104349Sphk        tokStart = *pp;
89104349Sphk        state = inString;
90104349Sphk        break;
91104349Sphk      }
92104349Sphk      break;
93104349Sphk    default:
94104349Sphk      if (state == init) {
95104349Sphk        tokStart = *pp;
96104349Sphk        state = inAtom;
97104349Sphk      }
98104349Sphk      break;
99104349Sphk    }
100104349Sphk    ++*pp;
101104349Sphk  }
102104349Sphk  /* not reached */
103104349Sphk}
104104349Sphk
105104349Sphk/* key must be lowercase ASCII */
106104349Sphk
107104349Sphkstatic int
108355604Sdelphijmatchkey(const char *start, const char *end, const char *key) {
109355604Sdelphij  if (! start)
110104349Sphk    return 0;
111104349Sphk  for (; start != end; start++, key++)
112104349Sphk    if (*start != *key && *start != 'A' + (*key - 'a'))
113104349Sphk      return 0;
114104349Sphk  return *key == '\0';
115104349Sphk}
116104349Sphk
117104349Sphkvoid
118355604SdelphijgetXMLCharset(const char *buf, char *charset) {
119104349Sphk  const char *next, *p;
120104349Sphk
121104349Sphk  charset[0] = '\0';
122104349Sphk  next = buf;
123104349Sphk  p = getTok(&next);
124104349Sphk  if (matchkey(p, next, "text"))
125104349Sphk    strcpy(charset, "us-ascii");
126355604Sdelphij  else if (! matchkey(p, next, "application"))
127104349Sphk    return;
128104349Sphk  p = getTok(&next);
129355604Sdelphij  if (! p || *p != '/')
130104349Sphk    return;
131104349Sphk  p = getTok(&next);
132355604Sdelphij  /* BEGIN disabled code */
133355604Sdelphij  if (0) {
134355604Sdelphij    if (! matchkey(p, next, "xml") && charset[0] == '\0')
135355604Sdelphij      return;
136355604Sdelphij  }
137355604Sdelphij  /* END disabled code */
138104349Sphk  p = getTok(&next);
139104349Sphk  while (p) {
140104349Sphk    if (*p == ';') {
141104349Sphk      p = getTok(&next);
142104349Sphk      if (matchkey(p, next, "charset")) {
143104349Sphk        p = getTok(&next);
144104349Sphk        if (p && *p == '=') {
145104349Sphk          p = getTok(&next);
146104349Sphk          if (p) {
147104349Sphk            char *s = charset;
148104349Sphk            if (*p == '"') {
149104349Sphk              while (++p != next - 1) {
150104349Sphk                if (*p == '\\')
151104349Sphk                  ++p;
152104349Sphk                if (s == charset + CHARSET_MAX - 1) {
153104349Sphk                  charset[0] = '\0';
154104349Sphk                  break;
155104349Sphk                }
156104349Sphk                *s++ = *p;
157104349Sphk              }
158104349Sphk              *s++ = '\0';
159355604Sdelphij            } else {
160104349Sphk              if (next - p > CHARSET_MAX - 1)
161104349Sphk                break;
162104349Sphk              while (p != next)
163104349Sphk                *s++ = *p++;
164104349Sphk              *s = 0;
165104349Sphk              break;
166104349Sphk            }
167104349Sphk          }
168104349Sphk        }
169104349Sphk        break;
170104349Sphk      }
171355604Sdelphij    } else
172355604Sdelphij      p = getTok(&next);
173104349Sphk  }
174104349Sphk}
175104349Sphk
176104349Sphk#ifdef TEST
177104349Sphk
178355604Sdelphij#  include <stdio.h>
179104349Sphk
180104349Sphkint
181355604Sdelphijmain(int argc, char *argv[]) {
182104349Sphk  char buf[CHARSET_MAX];
183104349Sphk  if (argc <= 1)
184104349Sphk    return 1;
185104349Sphk  printf("%s\n", argv[1]);
186104349Sphk  getXMLCharset(argv[1], buf);
187104349Sphk  printf("charset=\"%s\"\n", buf);
188104349Sphk  return 0;
189104349Sphk}
190104349Sphk
191104349Sphk#endif /* TEST */
192