1#define CHARSET_MAX 41
2
3static const char *
4getTok(const char **pp)
5{
6  enum { inAtom, inString, init, inComment };
7  int state = init;
8  const char *tokStart = 0;
9  for (;;) {
10    switch (**pp) {
11    case '\0':
12      return 0;
13    case ' ':
14    case '\r':
15    case '\t':
16    case '\n':
17      if (state == inAtom)
18        return tokStart;
19      break;
20    case '(':
21      if (state == inAtom)
22        return tokStart;
23      if (state != inString)
24        state++;
25      break;
26    case ')':
27      if (state > init)
28        --state;
29      else if (state != inString)
30        return 0;
31      break;
32    case ';':
33    case '/':
34    case '=':
35      if (state == inAtom)
36        return tokStart;
37      if (state == init)
38        return (*pp)++;
39      break;
40    case '\\':
41      ++*pp;
42      if (**pp == '\0')
43        return 0;
44      break;
45    case '"':
46      switch (state) {
47      case inString:
48        ++*pp;
49        return tokStart;
50      case inAtom:
51        return tokStart;
52      case init:
53        tokStart = *pp;
54        state = inString;
55        break;
56      }
57      break;
58    default:
59      if (state == init) {
60        tokStart = *pp;
61        state = inAtom;
62      }
63      break;
64    }
65    ++*pp;
66  }
67  /* not reached */
68}
69
70/* key must be lowercase ASCII */
71
72static int
73matchkey(const char *start, const char *end, const char *key)
74{
75  if (!start)
76    return 0;
77  for (; start != end; start++, key++)
78    if (*start != *key && *start != 'A' + (*key - 'a'))
79      return 0;
80  return *key == '\0';
81}
82
83void
84getXMLCharset(const char *buf, char *charset)
85{
86  const char *next, *p;
87
88  charset[0] = '\0';
89  next = buf;
90  p = getTok(&next);
91  if (matchkey(p, next, "text"))
92    strcpy(charset, "us-ascii");
93  else if (!matchkey(p, next, "application"))
94    return;
95  p = getTok(&next);
96  if (!p || *p != '/')
97    return;
98  p = getTok(&next);
99  if (matchkey(p, next, "xml"))
100    isXml = 1;
101  p = getTok(&next);
102  while (p) {
103    if (*p == ';') {
104      p = getTok(&next);
105      if (matchkey(p, next, "charset")) {
106        p = getTok(&next);
107        if (p && *p == '=') {
108          p = getTok(&next);
109          if (p) {
110            char *s = charset;
111            if (*p == '"') {
112              while (++p != next - 1) {
113                if (*p == '\\')
114                  ++p;
115                if (s == charset + CHARSET_MAX - 1) {
116                  charset[0] = '\0';
117                  break;
118                }
119                *s++ = *p;
120              }
121              *s++ = '\0';
122            }
123            else {
124              if (next - p > CHARSET_MAX - 1)
125                break;
126              while (p != next)
127                *s++ = *p++;
128              *s = 0;
129              break;
130            }
131          }
132        }
133      }
134    }
135  else
136    p = getTok(&next);
137  }
138}
139
140int
141main(int argc, char **argv)
142{
143  char buf[CHARSET_MAX];
144  getXMLCharset(argv[1], buf);
145  printf("charset = \"%s\"\n", buf);
146  return 0;
147}
148