ct.c revision 104349
1104349Sphk#define CHARSET_MAX 41
2104349Sphk
3104349Sphkstatic const char *
4104349SphkgetTok(const char **pp)
5104349Sphk{
6104349Sphk  enum { inAtom, inString, init, inComment };
7104349Sphk  int state = init;
8104349Sphk  const char *tokStart = 0;
9104349Sphk  for (;;) {
10104349Sphk    switch (**pp) {
11104349Sphk    case '\0':
12104349Sphk      return 0;
13104349Sphk    case ' ':
14104349Sphk    case '\r':
15104349Sphk    case '\t':
16104349Sphk    case '\n':
17104349Sphk      if (state == inAtom)
18104349Sphk        return tokStart;
19104349Sphk      break;
20104349Sphk    case '(':
21104349Sphk      if (state == inAtom)
22104349Sphk        return tokStart;
23104349Sphk      if (state != inString)
24104349Sphk        state++;
25104349Sphk      break;
26104349Sphk    case ')':
27104349Sphk      if (state > init)
28104349Sphk        --state;
29104349Sphk      else if (state != inString)
30104349Sphk        return 0;
31104349Sphk      break;
32104349Sphk    case ';':
33104349Sphk    case '/':
34104349Sphk    case '=':
35104349Sphk      if (state == inAtom)
36104349Sphk        return tokStart;
37104349Sphk      if (state == init)
38104349Sphk        return (*pp)++;
39104349Sphk      break;
40104349Sphk    case '\\':
41104349Sphk      ++*pp;
42104349Sphk      if (**pp == '\0')
43104349Sphk        return 0;
44104349Sphk      break;
45104349Sphk    case '"':
46104349Sphk      switch (state) {
47104349Sphk      case inString:
48104349Sphk        ++*pp;
49104349Sphk        return tokStart;
50104349Sphk      case inAtom:
51104349Sphk        return tokStart;
52104349Sphk      case init:
53104349Sphk        tokStart = *pp;
54104349Sphk        state = inString;
55104349Sphk        break;
56104349Sphk      }
57104349Sphk      break;
58104349Sphk    default:
59104349Sphk      if (state == init) {
60104349Sphk        tokStart = *pp;
61104349Sphk        state = inAtom;
62104349Sphk      }
63104349Sphk      break;
64104349Sphk    }
65104349Sphk    ++*pp;
66104349Sphk  }
67104349Sphk  /* not reached */
68104349Sphk}
69104349Sphk
70104349Sphk/* key must be lowercase ASCII */
71104349Sphk
72104349Sphkstatic int
73104349Sphkmatchkey(const char *start, const char *end, const char *key)
74104349Sphk{
75104349Sphk  if (!start)
76104349Sphk    return 0;
77104349Sphk  for (; start != end; start++, key++)
78104349Sphk    if (*start != *key && *start != 'A' + (*key - 'a'))
79104349Sphk      return 0;
80104349Sphk  return *key == '\0';
81104349Sphk}
82104349Sphk
83104349Sphkvoid
84104349SphkgetXMLCharset(const char *buf, char *charset)
85104349Sphk{
86104349Sphk  const char *next, *p;
87104349Sphk
88104349Sphk  charset[0] = '\0';
89104349Sphk  next = buf;
90104349Sphk  p = getTok(&next);
91104349Sphk  if (matchkey(p, next, "text"))
92104349Sphk    strcpy(charset, "us-ascii");
93104349Sphk  else if (!matchkey(p, next, "application"))
94104349Sphk    return;
95104349Sphk  p = getTok(&next);
96104349Sphk  if (!p || *p != '/')
97104349Sphk    return;
98104349Sphk  p = getTok(&next);
99104349Sphk  if (matchkey(p, next, "xml"))
100104349Sphk    isXml = 1;
101104349Sphk  p = getTok(&next);
102104349Sphk  while (p) {
103104349Sphk    if (*p == ';') {
104104349Sphk      p = getTok(&next);
105104349Sphk      if (matchkey(p, next, "charset")) {
106104349Sphk        p = getTok(&next);
107104349Sphk        if (p && *p == '=') {
108104349Sphk          p = getTok(&next);
109104349Sphk          if (p) {
110104349Sphk            char *s = charset;
111104349Sphk            if (*p == '"') {
112104349Sphk              while (++p != next - 1) {
113104349Sphk                if (*p == '\\')
114104349Sphk                  ++p;
115104349Sphk                if (s == charset + CHARSET_MAX - 1) {
116104349Sphk                  charset[0] = '\0';
117104349Sphk                  break;
118104349Sphk                }
119104349Sphk                *s++ = *p;
120104349Sphk              }
121104349Sphk              *s++ = '\0';
122104349Sphk            }
123104349Sphk            else {
124104349Sphk              if (next - p > CHARSET_MAX - 1)
125104349Sphk                break;
126104349Sphk              while (p != next)
127104349Sphk                *s++ = *p++;
128104349Sphk              *s = 0;
129104349Sphk              break;
130104349Sphk            }
131104349Sphk          }
132104349Sphk        }
133104349Sphk      }
134104349Sphk    }
135104349Sphk  else
136104349Sphk    p = getTok(&next);
137104349Sphk  }
138104349Sphk}
139104349Sphk
140104349Sphkint
141104349Sphkmain(int argc, char **argv)
142104349Sphk{
143104349Sphk  char buf[CHARSET_MAX];
144104349Sphk  getXMLCharset(argv[1], buf);
145104349Sphk  printf("charset = \"%s\"\n", buf);
146104349Sphk  return 0;
147104349Sphk}
148