1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2   See the file COPYING for copying permission.
3*/
4
5#ifdef COMPILED_FROM_DSP
6#include "winconfig.h"
7#elif defined(MACOS_CLASSIC)
8#include "macconfig.h"
9#else
10#ifdef HAVE_EXPAT_CONFIG_H
11#include <expat_config.h>
12#endif
13#endif /* ndef COMPILED_FROM_DSP */
14
15#include "internal.h"
16#include "xmlrole.h"
17#include "ascii.h"
18
19/* Doesn't check:
20
21 that ,| are not mixed in a model group
22 content of literals
23
24*/
25
26static const char KW_ANY[] = {
27    ASCII_A, ASCII_N, ASCII_Y, '\0' };
28static const char KW_ATTLIST[] = {
29    ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
30static const char KW_CDATA[] = {
31    ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
32static const char KW_DOCTYPE[] = {
33    ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
34static const char KW_ELEMENT[] = {
35    ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
36static const char KW_EMPTY[] = {
37    ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
38static const char KW_ENTITIES[] = {
39    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
40    '\0' };
41static const char KW_ENTITY[] = {
42    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
43static const char KW_FIXED[] = {
44    ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
45static const char KW_ID[] = {
46    ASCII_I, ASCII_D, '\0' };
47static const char KW_IDREF[] = {
48    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
49static const char KW_IDREFS[] = {
50    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
51static const char KW_IGNORE[] = {
52    ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
53static const char KW_IMPLIED[] = {
54    ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
55static const char KW_INCLUDE[] = {
56    ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
57static const char KW_NDATA[] = {
58    ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
59static const char KW_NMTOKEN[] = {
60    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
61static const char KW_NMTOKENS[] = {
62    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
63    '\0' };
64static const char KW_NOTATION[] =
65    { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
66      '\0' };
67static const char KW_PCDATA[] = {
68    ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
69static const char KW_PUBLIC[] = {
70    ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
71static const char KW_REQUIRED[] = {
72    ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
73    '\0' };
74static const char KW_SYSTEM[] = {
75    ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
76
77#ifndef MIN_BYTES_PER_CHAR
78#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
79#endif
80
81#ifdef XML_DTD
82#define setTopLevel(state) \
83  ((state)->handler = ((state)->documentEntity \
84                       ? internalSubset \
85                       : externalSubset1))
86#else /* not XML_DTD */
87#define setTopLevel(state) ((state)->handler = internalSubset)
88#endif /* not XML_DTD */
89
90typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
91                                   int tok,
92                                   const char *ptr,
93                                   const char *end,
94                                   const ENCODING *enc);
95
96static PROLOG_HANDLER
97  prolog0, prolog1, prolog2,
98  doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
99  internalSubset,
100  entity0, entity1, entity2, entity3, entity4, entity5, entity6,
101  entity7, entity8, entity9, entity10,
102  notation0, notation1, notation2, notation3, notation4,
103  attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
104  attlist7, attlist8, attlist9,
105  element0, element1, element2, element3, element4, element5, element6,
106  element7,
107#ifdef XML_DTD
108  externalSubset0, externalSubset1,
109  condSect0, condSect1, condSect2,
110#endif /* XML_DTD */
111  declClose,
112  error;
113
114static int FASTCALL common(PROLOG_STATE *state, int tok);
115
116static int PTRCALL
117prolog0(PROLOG_STATE *state,
118        int tok,
119        const char *ptr,
120        const char *end,
121        const ENCODING *enc)
122{
123  switch (tok) {
124  case XML_TOK_PROLOG_S:
125    state->handler = prolog1;
126    return XML_ROLE_NONE;
127  case XML_TOK_XML_DECL:
128    state->handler = prolog1;
129    return XML_ROLE_XML_DECL;
130  case XML_TOK_PI:
131    state->handler = prolog1;
132    return XML_ROLE_PI;
133  case XML_TOK_COMMENT:
134    state->handler = prolog1;
135    return XML_ROLE_COMMENT;
136  case XML_TOK_BOM:
137    return XML_ROLE_NONE;
138  case XML_TOK_DECL_OPEN:
139    if (!XmlNameMatchesAscii(enc,
140                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
141                             end,
142                             KW_DOCTYPE))
143      break;
144    state->handler = doctype0;
145    return XML_ROLE_DOCTYPE_NONE;
146  case XML_TOK_INSTANCE_START:
147    state->handler = error;
148    return XML_ROLE_INSTANCE_START;
149  }
150  return common(state, tok);
151}
152
153static int PTRCALL
154prolog1(PROLOG_STATE *state,
155        int tok,
156        const char *ptr,
157        const char *end,
158        const ENCODING *enc)
159{
160  switch (tok) {
161  case XML_TOK_PROLOG_S:
162    return XML_ROLE_NONE;
163  case XML_TOK_PI:
164    return XML_ROLE_PI;
165  case XML_TOK_COMMENT:
166    return XML_ROLE_COMMENT;
167  case XML_TOK_BOM:
168    return XML_ROLE_NONE;
169  case XML_TOK_DECL_OPEN:
170    if (!XmlNameMatchesAscii(enc,
171                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
172                             end,
173                             KW_DOCTYPE))
174      break;
175    state->handler = doctype0;
176    return XML_ROLE_DOCTYPE_NONE;
177  case XML_TOK_INSTANCE_START:
178    state->handler = error;
179    return XML_ROLE_INSTANCE_START;
180  }
181  return common(state, tok);
182}
183
184static int PTRCALL
185prolog2(PROLOG_STATE *state,
186        int tok,
187        const char *ptr,
188        const char *end,
189        const ENCODING *enc)
190{
191  switch (tok) {
192  case XML_TOK_PROLOG_S:
193    return XML_ROLE_NONE;
194  case XML_TOK_PI:
195    return XML_ROLE_PI;
196  case XML_TOK_COMMENT:
197    return XML_ROLE_COMMENT;
198  case XML_TOK_INSTANCE_START:
199    state->handler = error;
200    return XML_ROLE_INSTANCE_START;
201  }
202  return common(state, tok);
203}
204
205static int PTRCALL
206doctype0(PROLOG_STATE *state,
207         int tok,
208         const char *ptr,
209         const char *end,
210         const ENCODING *enc)
211{
212  switch (tok) {
213  case XML_TOK_PROLOG_S:
214    return XML_ROLE_DOCTYPE_NONE;
215  case XML_TOK_NAME:
216  case XML_TOK_PREFIXED_NAME:
217    state->handler = doctype1;
218    return XML_ROLE_DOCTYPE_NAME;
219  }
220  return common(state, tok);
221}
222
223static int PTRCALL
224doctype1(PROLOG_STATE *state,
225         int tok,
226         const char *ptr,
227         const char *end,
228         const ENCODING *enc)
229{
230  switch (tok) {
231  case XML_TOK_PROLOG_S:
232    return XML_ROLE_DOCTYPE_NONE;
233  case XML_TOK_OPEN_BRACKET:
234    state->handler = internalSubset;
235    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
236  case XML_TOK_DECL_CLOSE:
237    state->handler = prolog2;
238    return XML_ROLE_DOCTYPE_CLOSE;
239  case XML_TOK_NAME:
240    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
241      state->handler = doctype3;
242      return XML_ROLE_DOCTYPE_NONE;
243    }
244    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
245      state->handler = doctype2;
246      return XML_ROLE_DOCTYPE_NONE;
247    }
248    break;
249  }
250  return common(state, tok);
251}
252
253static int PTRCALL
254doctype2(PROLOG_STATE *state,
255         int tok,
256         const char *ptr,
257         const char *end,
258         const ENCODING *enc)
259{
260  switch (tok) {
261  case XML_TOK_PROLOG_S:
262    return XML_ROLE_DOCTYPE_NONE;
263  case XML_TOK_LITERAL:
264    state->handler = doctype3;
265    return XML_ROLE_DOCTYPE_PUBLIC_ID;
266  }
267  return common(state, tok);
268}
269
270static int PTRCALL
271doctype3(PROLOG_STATE *state,
272         int tok,
273         const char *ptr,
274         const char *end,
275         const ENCODING *enc)
276{
277  switch (tok) {
278  case XML_TOK_PROLOG_S:
279    return XML_ROLE_DOCTYPE_NONE;
280  case XML_TOK_LITERAL:
281    state->handler = doctype4;
282    return XML_ROLE_DOCTYPE_SYSTEM_ID;
283  }
284  return common(state, tok);
285}
286
287static int PTRCALL
288doctype4(PROLOG_STATE *state,
289         int tok,
290         const char *ptr,
291         const char *end,
292         const ENCODING *enc)
293{
294  switch (tok) {
295  case XML_TOK_PROLOG_S:
296    return XML_ROLE_DOCTYPE_NONE;
297  case XML_TOK_OPEN_BRACKET:
298    state->handler = internalSubset;
299    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
300  case XML_TOK_DECL_CLOSE:
301    state->handler = prolog2;
302    return XML_ROLE_DOCTYPE_CLOSE;
303  }
304  return common(state, tok);
305}
306
307static int PTRCALL
308doctype5(PROLOG_STATE *state,
309         int tok,
310         const char *ptr,
311         const char *end,
312         const ENCODING *enc)
313{
314  switch (tok) {
315  case XML_TOK_PROLOG_S:
316    return XML_ROLE_DOCTYPE_NONE;
317  case XML_TOK_DECL_CLOSE:
318    state->handler = prolog2;
319    return XML_ROLE_DOCTYPE_CLOSE;
320  }
321  return common(state, tok);
322}
323
324static int PTRCALL
325internalSubset(PROLOG_STATE *state,
326               int tok,
327               const char *ptr,
328               const char *end,
329               const ENCODING *enc)
330{
331  switch (tok) {
332  case XML_TOK_PROLOG_S:
333    return XML_ROLE_NONE;
334  case XML_TOK_DECL_OPEN:
335    if (XmlNameMatchesAscii(enc,
336                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
337                            end,
338                            KW_ENTITY)) {
339      state->handler = entity0;
340      return XML_ROLE_ENTITY_NONE;
341    }
342    if (XmlNameMatchesAscii(enc,
343                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
344                            end,
345                            KW_ATTLIST)) {
346      state->handler = attlist0;
347      return XML_ROLE_ATTLIST_NONE;
348    }
349    if (XmlNameMatchesAscii(enc,
350                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
351                            end,
352                            KW_ELEMENT)) {
353      state->handler = element0;
354      return XML_ROLE_ELEMENT_NONE;
355    }
356    if (XmlNameMatchesAscii(enc,
357                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
358                            end,
359                            KW_NOTATION)) {
360      state->handler = notation0;
361      return XML_ROLE_NOTATION_NONE;
362    }
363    break;
364  case XML_TOK_PI:
365    return XML_ROLE_PI;
366  case XML_TOK_COMMENT:
367    return XML_ROLE_COMMENT;
368  case XML_TOK_PARAM_ENTITY_REF:
369    return XML_ROLE_PARAM_ENTITY_REF;
370  case XML_TOK_CLOSE_BRACKET:
371    state->handler = doctype5;
372    return XML_ROLE_DOCTYPE_NONE;
373  }
374  return common(state, tok);
375}
376
377#ifdef XML_DTD
378
379static int PTRCALL
380externalSubset0(PROLOG_STATE *state,
381                int tok,
382                const char *ptr,
383                const char *end,
384                const ENCODING *enc)
385{
386  state->handler = externalSubset1;
387  if (tok == XML_TOK_XML_DECL)
388    return XML_ROLE_TEXT_DECL;
389  return externalSubset1(state, tok, ptr, end, enc);
390}
391
392static int PTRCALL
393externalSubset1(PROLOG_STATE *state,
394                int tok,
395                const char *ptr,
396                const char *end,
397                const ENCODING *enc)
398{
399  switch (tok) {
400  case XML_TOK_COND_SECT_OPEN:
401    state->handler = condSect0;
402    return XML_ROLE_NONE;
403  case XML_TOK_COND_SECT_CLOSE:
404    if (state->includeLevel == 0)
405      break;
406    state->includeLevel -= 1;
407    return XML_ROLE_NONE;
408  case XML_TOK_PROLOG_S:
409    return XML_ROLE_NONE;
410  case XML_TOK_CLOSE_BRACKET:
411    break;
412  case XML_TOK_NONE:
413    if (state->includeLevel)
414      break;
415    return XML_ROLE_NONE;
416  default:
417    return internalSubset(state, tok, ptr, end, enc);
418  }
419  return common(state, tok);
420}
421
422#endif /* XML_DTD */
423
424static int PTRCALL
425entity0(PROLOG_STATE *state,
426        int tok,
427        const char *ptr,
428        const char *end,
429        const ENCODING *enc)
430{
431  switch (tok) {
432  case XML_TOK_PROLOG_S:
433    return XML_ROLE_ENTITY_NONE;
434  case XML_TOK_PERCENT:
435    state->handler = entity1;
436    return XML_ROLE_ENTITY_NONE;
437  case XML_TOK_NAME:
438    state->handler = entity2;
439    return XML_ROLE_GENERAL_ENTITY_NAME;
440  }
441  return common(state, tok);
442}
443
444static int PTRCALL
445entity1(PROLOG_STATE *state,
446        int tok,
447        const char *ptr,
448        const char *end,
449        const ENCODING *enc)
450{
451  switch (tok) {
452  case XML_TOK_PROLOG_S:
453    return XML_ROLE_ENTITY_NONE;
454  case XML_TOK_NAME:
455    state->handler = entity7;
456    return XML_ROLE_PARAM_ENTITY_NAME;
457  }
458  return common(state, tok);
459}
460
461static int PTRCALL
462entity2(PROLOG_STATE *state,
463        int tok,
464        const char *ptr,
465        const char *end,
466        const ENCODING *enc)
467{
468  switch (tok) {
469  case XML_TOK_PROLOG_S:
470    return XML_ROLE_ENTITY_NONE;
471  case XML_TOK_NAME:
472    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
473      state->handler = entity4;
474      return XML_ROLE_ENTITY_NONE;
475    }
476    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
477      state->handler = entity3;
478      return XML_ROLE_ENTITY_NONE;
479    }
480    break;
481  case XML_TOK_LITERAL:
482    state->handler = declClose;
483    state->role_none = XML_ROLE_ENTITY_NONE;
484    return XML_ROLE_ENTITY_VALUE;
485  }
486  return common(state, tok);
487}
488
489static int PTRCALL
490entity3(PROLOG_STATE *state,
491        int tok,
492        const char *ptr,
493        const char *end,
494        const ENCODING *enc)
495{
496  switch (tok) {
497  case XML_TOK_PROLOG_S:
498    return XML_ROLE_ENTITY_NONE;
499  case XML_TOK_LITERAL:
500    state->handler = entity4;
501    return XML_ROLE_ENTITY_PUBLIC_ID;
502  }
503  return common(state, tok);
504}
505
506static int PTRCALL
507entity4(PROLOG_STATE *state,
508        int tok,
509        const char *ptr,
510        const char *end,
511        const ENCODING *enc)
512{
513  switch (tok) {
514  case XML_TOK_PROLOG_S:
515    return XML_ROLE_ENTITY_NONE;
516  case XML_TOK_LITERAL:
517    state->handler = entity5;
518    return XML_ROLE_ENTITY_SYSTEM_ID;
519  }
520  return common(state, tok);
521}
522
523static int PTRCALL
524entity5(PROLOG_STATE *state,
525        int tok,
526        const char *ptr,
527        const char *end,
528        const ENCODING *enc)
529{
530  switch (tok) {
531  case XML_TOK_PROLOG_S:
532    return XML_ROLE_ENTITY_NONE;
533  case XML_TOK_DECL_CLOSE:
534    setTopLevel(state);
535    return XML_ROLE_ENTITY_COMPLETE;
536  case XML_TOK_NAME:
537    if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
538      state->handler = entity6;
539      return XML_ROLE_ENTITY_NONE;
540    }
541    break;
542  }
543  return common(state, tok);
544}
545
546static int PTRCALL
547entity6(PROLOG_STATE *state,
548        int tok,
549        const char *ptr,
550        const char *end,
551        const ENCODING *enc)
552{
553  switch (tok) {
554  case XML_TOK_PROLOG_S:
555    return XML_ROLE_ENTITY_NONE;
556  case XML_TOK_NAME:
557    state->handler = declClose;
558    state->role_none = XML_ROLE_ENTITY_NONE;
559    return XML_ROLE_ENTITY_NOTATION_NAME;
560  }
561  return common(state, tok);
562}
563
564static int PTRCALL
565entity7(PROLOG_STATE *state,
566        int tok,
567        const char *ptr,
568        const char *end,
569        const ENCODING *enc)
570{
571  switch (tok) {
572  case XML_TOK_PROLOG_S:
573    return XML_ROLE_ENTITY_NONE;
574  case XML_TOK_NAME:
575    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
576      state->handler = entity9;
577      return XML_ROLE_ENTITY_NONE;
578    }
579    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
580      state->handler = entity8;
581      return XML_ROLE_ENTITY_NONE;
582    }
583    break;
584  case XML_TOK_LITERAL:
585    state->handler = declClose;
586    state->role_none = XML_ROLE_ENTITY_NONE;
587    return XML_ROLE_ENTITY_VALUE;
588  }
589  return common(state, tok);
590}
591
592static int PTRCALL
593entity8(PROLOG_STATE *state,
594        int tok,
595        const char *ptr,
596        const char *end,
597        const ENCODING *enc)
598{
599  switch (tok) {
600  case XML_TOK_PROLOG_S:
601    return XML_ROLE_ENTITY_NONE;
602  case XML_TOK_LITERAL:
603    state->handler = entity9;
604    return XML_ROLE_ENTITY_PUBLIC_ID;
605  }
606  return common(state, tok);
607}
608
609static int PTRCALL
610entity9(PROLOG_STATE *state,
611        int tok,
612        const char *ptr,
613        const char *end,
614        const ENCODING *enc)
615{
616  switch (tok) {
617  case XML_TOK_PROLOG_S:
618    return XML_ROLE_ENTITY_NONE;
619  case XML_TOK_LITERAL:
620    state->handler = entity10;
621    return XML_ROLE_ENTITY_SYSTEM_ID;
622  }
623  return common(state, tok);
624}
625
626static int PTRCALL
627entity10(PROLOG_STATE *state,
628         int tok,
629         const char *ptr,
630         const char *end,
631         const ENCODING *enc)
632{
633  switch (tok) {
634  case XML_TOK_PROLOG_S:
635    return XML_ROLE_ENTITY_NONE;
636  case XML_TOK_DECL_CLOSE:
637    setTopLevel(state);
638    return XML_ROLE_ENTITY_COMPLETE;
639  }
640  return common(state, tok);
641}
642
643static int PTRCALL
644notation0(PROLOG_STATE *state,
645          int tok,
646          const char *ptr,
647          const char *end,
648          const ENCODING *enc)
649{
650  switch (tok) {
651  case XML_TOK_PROLOG_S:
652    return XML_ROLE_NOTATION_NONE;
653  case XML_TOK_NAME:
654    state->handler = notation1;
655    return XML_ROLE_NOTATION_NAME;
656  }
657  return common(state, tok);
658}
659
660static int PTRCALL
661notation1(PROLOG_STATE *state,
662          int tok,
663          const char *ptr,
664          const char *end,
665          const ENCODING *enc)
666{
667  switch (tok) {
668  case XML_TOK_PROLOG_S:
669    return XML_ROLE_NOTATION_NONE;
670  case XML_TOK_NAME:
671    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
672      state->handler = notation3;
673      return XML_ROLE_NOTATION_NONE;
674    }
675    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
676      state->handler = notation2;
677      return XML_ROLE_NOTATION_NONE;
678    }
679    break;
680  }
681  return common(state, tok);
682}
683
684static int PTRCALL
685notation2(PROLOG_STATE *state,
686          int tok,
687          const char *ptr,
688          const char *end,
689          const ENCODING *enc)
690{
691  switch (tok) {
692  case XML_TOK_PROLOG_S:
693    return XML_ROLE_NOTATION_NONE;
694  case XML_TOK_LITERAL:
695    state->handler = notation4;
696    return XML_ROLE_NOTATION_PUBLIC_ID;
697  }
698  return common(state, tok);
699}
700
701static int PTRCALL
702notation3(PROLOG_STATE *state,
703          int tok,
704          const char *ptr,
705          const char *end,
706          const ENCODING *enc)
707{
708  switch (tok) {
709  case XML_TOK_PROLOG_S:
710    return XML_ROLE_NOTATION_NONE;
711  case XML_TOK_LITERAL:
712    state->handler = declClose;
713    state->role_none = XML_ROLE_NOTATION_NONE;
714    return XML_ROLE_NOTATION_SYSTEM_ID;
715  }
716  return common(state, tok);
717}
718
719static int PTRCALL
720notation4(PROLOG_STATE *state,
721          int tok,
722          const char *ptr,
723          const char *end,
724          const ENCODING *enc)
725{
726  switch (tok) {
727  case XML_TOK_PROLOG_S:
728    return XML_ROLE_NOTATION_NONE;
729  case XML_TOK_LITERAL:
730    state->handler = declClose;
731    state->role_none = XML_ROLE_NOTATION_NONE;
732    return XML_ROLE_NOTATION_SYSTEM_ID;
733  case XML_TOK_DECL_CLOSE:
734    setTopLevel(state);
735    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
736  }
737  return common(state, tok);
738}
739
740static int PTRCALL
741attlist0(PROLOG_STATE *state,
742         int tok,
743         const char *ptr,
744         const char *end,
745         const ENCODING *enc)
746{
747  switch (tok) {
748  case XML_TOK_PROLOG_S:
749    return XML_ROLE_ATTLIST_NONE;
750  case XML_TOK_NAME:
751  case XML_TOK_PREFIXED_NAME:
752    state->handler = attlist1;
753    return XML_ROLE_ATTLIST_ELEMENT_NAME;
754  }
755  return common(state, tok);
756}
757
758static int PTRCALL
759attlist1(PROLOG_STATE *state,
760         int tok,
761         const char *ptr,
762         const char *end,
763         const ENCODING *enc)
764{
765  switch (tok) {
766  case XML_TOK_PROLOG_S:
767    return XML_ROLE_ATTLIST_NONE;
768  case XML_TOK_DECL_CLOSE:
769    setTopLevel(state);
770    return XML_ROLE_ATTLIST_NONE;
771  case XML_TOK_NAME:
772  case XML_TOK_PREFIXED_NAME:
773    state->handler = attlist2;
774    return XML_ROLE_ATTRIBUTE_NAME;
775  }
776  return common(state, tok);
777}
778
779static int PTRCALL
780attlist2(PROLOG_STATE *state,
781         int tok,
782         const char *ptr,
783         const char *end,
784         const ENCODING *enc)
785{
786  switch (tok) {
787  case XML_TOK_PROLOG_S:
788    return XML_ROLE_ATTLIST_NONE;
789  case XML_TOK_NAME:
790    {
791      static const char *types[] = {
792        KW_CDATA,
793        KW_ID,
794        KW_IDREF,
795        KW_IDREFS,
796        KW_ENTITY,
797        KW_ENTITIES,
798        KW_NMTOKEN,
799        KW_NMTOKENS,
800      };
801      int i;
802      for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
803        if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
804          state->handler = attlist8;
805          return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
806        }
807    }
808    if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
809      state->handler = attlist5;
810      return XML_ROLE_ATTLIST_NONE;
811    }
812    break;
813  case XML_TOK_OPEN_PAREN:
814    state->handler = attlist3;
815    return XML_ROLE_ATTLIST_NONE;
816  }
817  return common(state, tok);
818}
819
820static int PTRCALL
821attlist3(PROLOG_STATE *state,
822         int tok,
823         const char *ptr,
824         const char *end,
825         const ENCODING *enc)
826{
827  switch (tok) {
828  case XML_TOK_PROLOG_S:
829    return XML_ROLE_ATTLIST_NONE;
830  case XML_TOK_NMTOKEN:
831  case XML_TOK_NAME:
832  case XML_TOK_PREFIXED_NAME:
833    state->handler = attlist4;
834    return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
835  }
836  return common(state, tok);
837}
838
839static int PTRCALL
840attlist4(PROLOG_STATE *state,
841         int tok,
842         const char *ptr,
843         const char *end,
844         const ENCODING *enc)
845{
846  switch (tok) {
847  case XML_TOK_PROLOG_S:
848    return XML_ROLE_ATTLIST_NONE;
849  case XML_TOK_CLOSE_PAREN:
850    state->handler = attlist8;
851    return XML_ROLE_ATTLIST_NONE;
852  case XML_TOK_OR:
853    state->handler = attlist3;
854    return XML_ROLE_ATTLIST_NONE;
855  }
856  return common(state, tok);
857}
858
859static int PTRCALL
860attlist5(PROLOG_STATE *state,
861         int tok,
862         const char *ptr,
863         const char *end,
864         const ENCODING *enc)
865{
866  switch (tok) {
867  case XML_TOK_PROLOG_S:
868    return XML_ROLE_ATTLIST_NONE;
869  case XML_TOK_OPEN_PAREN:
870    state->handler = attlist6;
871    return XML_ROLE_ATTLIST_NONE;
872  }
873  return common(state, tok);
874}
875
876static int PTRCALL
877attlist6(PROLOG_STATE *state,
878         int tok,
879         const char *ptr,
880         const char *end,
881         const ENCODING *enc)
882{
883  switch (tok) {
884  case XML_TOK_PROLOG_S:
885    return XML_ROLE_ATTLIST_NONE;
886  case XML_TOK_NAME:
887    state->handler = attlist7;
888    return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
889  }
890  return common(state, tok);
891}
892
893static int PTRCALL
894attlist7(PROLOG_STATE *state,
895         int tok,
896         const char *ptr,
897         const char *end,
898         const ENCODING *enc)
899{
900  switch (tok) {
901  case XML_TOK_PROLOG_S:
902    return XML_ROLE_ATTLIST_NONE;
903  case XML_TOK_CLOSE_PAREN:
904    state->handler = attlist8;
905    return XML_ROLE_ATTLIST_NONE;
906  case XML_TOK_OR:
907    state->handler = attlist6;
908    return XML_ROLE_ATTLIST_NONE;
909  }
910  return common(state, tok);
911}
912
913/* default value */
914static int PTRCALL
915attlist8(PROLOG_STATE *state,
916         int tok,
917         const char *ptr,
918         const char *end,
919         const ENCODING *enc)
920{
921  switch (tok) {
922  case XML_TOK_PROLOG_S:
923    return XML_ROLE_ATTLIST_NONE;
924  case XML_TOK_POUND_NAME:
925    if (XmlNameMatchesAscii(enc,
926                            ptr + MIN_BYTES_PER_CHAR(enc),
927                            end,
928                            KW_IMPLIED)) {
929      state->handler = attlist1;
930      return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
931    }
932    if (XmlNameMatchesAscii(enc,
933                            ptr + MIN_BYTES_PER_CHAR(enc),
934                            end,
935                            KW_REQUIRED)) {
936      state->handler = attlist1;
937      return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
938    }
939    if (XmlNameMatchesAscii(enc,
940                            ptr + MIN_BYTES_PER_CHAR(enc),
941                            end,
942                            KW_FIXED)) {
943      state->handler = attlist9;
944      return XML_ROLE_ATTLIST_NONE;
945    }
946    break;
947  case XML_TOK_LITERAL:
948    state->handler = attlist1;
949    return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
950  }
951  return common(state, tok);
952}
953
954static int PTRCALL
955attlist9(PROLOG_STATE *state,
956         int tok,
957         const char *ptr,
958         const char *end,
959         const ENCODING *enc)
960{
961  switch (tok) {
962  case XML_TOK_PROLOG_S:
963    return XML_ROLE_ATTLIST_NONE;
964  case XML_TOK_LITERAL:
965    state->handler = attlist1;
966    return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
967  }
968  return common(state, tok);
969}
970
971static int PTRCALL
972element0(PROLOG_STATE *state,
973         int tok,
974         const char *ptr,
975         const char *end,
976         const ENCODING *enc)
977{
978  switch (tok) {
979  case XML_TOK_PROLOG_S:
980    return XML_ROLE_ELEMENT_NONE;
981  case XML_TOK_NAME:
982  case XML_TOK_PREFIXED_NAME:
983    state->handler = element1;
984    return XML_ROLE_ELEMENT_NAME;
985  }
986  return common(state, tok);
987}
988
989static int PTRCALL
990element1(PROLOG_STATE *state,
991         int tok,
992         const char *ptr,
993         const char *end,
994         const ENCODING *enc)
995{
996  switch (tok) {
997  case XML_TOK_PROLOG_S:
998    return XML_ROLE_ELEMENT_NONE;
999  case XML_TOK_NAME:
1000    if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1001      state->handler = declClose;
1002      state->role_none = XML_ROLE_ELEMENT_NONE;
1003      return XML_ROLE_CONTENT_EMPTY;
1004    }
1005    if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1006      state->handler = declClose;
1007      state->role_none = XML_ROLE_ELEMENT_NONE;
1008      return XML_ROLE_CONTENT_ANY;
1009    }
1010    break;
1011  case XML_TOK_OPEN_PAREN:
1012    state->handler = element2;
1013    state->level = 1;
1014    return XML_ROLE_GROUP_OPEN;
1015  }
1016  return common(state, tok);
1017}
1018
1019static int PTRCALL
1020element2(PROLOG_STATE *state,
1021         int tok,
1022         const char *ptr,
1023         const char *end,
1024         const ENCODING *enc)
1025{
1026  switch (tok) {
1027  case XML_TOK_PROLOG_S:
1028    return XML_ROLE_ELEMENT_NONE;
1029  case XML_TOK_POUND_NAME:
1030    if (XmlNameMatchesAscii(enc,
1031                            ptr + MIN_BYTES_PER_CHAR(enc),
1032                            end,
1033                            KW_PCDATA)) {
1034      state->handler = element3;
1035      return XML_ROLE_CONTENT_PCDATA;
1036    }
1037    break;
1038  case XML_TOK_OPEN_PAREN:
1039    state->level = 2;
1040    state->handler = element6;
1041    return XML_ROLE_GROUP_OPEN;
1042  case XML_TOK_NAME:
1043  case XML_TOK_PREFIXED_NAME:
1044    state->handler = element7;
1045    return XML_ROLE_CONTENT_ELEMENT;
1046  case XML_TOK_NAME_QUESTION:
1047    state->handler = element7;
1048    return XML_ROLE_CONTENT_ELEMENT_OPT;
1049  case XML_TOK_NAME_ASTERISK:
1050    state->handler = element7;
1051    return XML_ROLE_CONTENT_ELEMENT_REP;
1052  case XML_TOK_NAME_PLUS:
1053    state->handler = element7;
1054    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1055  }
1056  return common(state, tok);
1057}
1058
1059static int PTRCALL
1060element3(PROLOG_STATE *state,
1061         int tok,
1062         const char *ptr,
1063         const char *end,
1064         const ENCODING *enc)
1065{
1066  switch (tok) {
1067  case XML_TOK_PROLOG_S:
1068    return XML_ROLE_ELEMENT_NONE;
1069  case XML_TOK_CLOSE_PAREN:
1070    state->handler = declClose;
1071    state->role_none = XML_ROLE_ELEMENT_NONE;
1072    return XML_ROLE_GROUP_CLOSE;
1073  case XML_TOK_CLOSE_PAREN_ASTERISK:
1074    state->handler = declClose;
1075    state->role_none = XML_ROLE_ELEMENT_NONE;
1076    return XML_ROLE_GROUP_CLOSE_REP;
1077  case XML_TOK_OR:
1078    state->handler = element4;
1079    return XML_ROLE_ELEMENT_NONE;
1080  }
1081  return common(state, tok);
1082}
1083
1084static int PTRCALL
1085element4(PROLOG_STATE *state,
1086         int tok,
1087         const char *ptr,
1088         const char *end,
1089         const ENCODING *enc)
1090{
1091  switch (tok) {
1092  case XML_TOK_PROLOG_S:
1093    return XML_ROLE_ELEMENT_NONE;
1094  case XML_TOK_NAME:
1095  case XML_TOK_PREFIXED_NAME:
1096    state->handler = element5;
1097    return XML_ROLE_CONTENT_ELEMENT;
1098  }
1099  return common(state, tok);
1100}
1101
1102static int PTRCALL
1103element5(PROLOG_STATE *state,
1104         int tok,
1105         const char *ptr,
1106         const char *end,
1107         const ENCODING *enc)
1108{
1109  switch (tok) {
1110  case XML_TOK_PROLOG_S:
1111    return XML_ROLE_ELEMENT_NONE;
1112  case XML_TOK_CLOSE_PAREN_ASTERISK:
1113    state->handler = declClose;
1114    state->role_none = XML_ROLE_ELEMENT_NONE;
1115    return XML_ROLE_GROUP_CLOSE_REP;
1116  case XML_TOK_OR:
1117    state->handler = element4;
1118    return XML_ROLE_ELEMENT_NONE;
1119  }
1120  return common(state, tok);
1121}
1122
1123static int PTRCALL
1124element6(PROLOG_STATE *state,
1125         int tok,
1126         const char *ptr,
1127         const char *end,
1128         const ENCODING *enc)
1129{
1130  switch (tok) {
1131  case XML_TOK_PROLOG_S:
1132    return XML_ROLE_ELEMENT_NONE;
1133  case XML_TOK_OPEN_PAREN:
1134    state->level += 1;
1135    return XML_ROLE_GROUP_OPEN;
1136  case XML_TOK_NAME:
1137  case XML_TOK_PREFIXED_NAME:
1138    state->handler = element7;
1139    return XML_ROLE_CONTENT_ELEMENT;
1140  case XML_TOK_NAME_QUESTION:
1141    state->handler = element7;
1142    return XML_ROLE_CONTENT_ELEMENT_OPT;
1143  case XML_TOK_NAME_ASTERISK:
1144    state->handler = element7;
1145    return XML_ROLE_CONTENT_ELEMENT_REP;
1146  case XML_TOK_NAME_PLUS:
1147    state->handler = element7;
1148    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1149  }
1150  return common(state, tok);
1151}
1152
1153static int PTRCALL
1154element7(PROLOG_STATE *state,
1155         int tok,
1156         const char *ptr,
1157         const char *end,
1158         const ENCODING *enc)
1159{
1160  switch (tok) {
1161  case XML_TOK_PROLOG_S:
1162    return XML_ROLE_ELEMENT_NONE;
1163  case XML_TOK_CLOSE_PAREN:
1164    state->level -= 1;
1165    if (state->level == 0) {
1166      state->handler = declClose;
1167      state->role_none = XML_ROLE_ELEMENT_NONE;
1168    }
1169    return XML_ROLE_GROUP_CLOSE;
1170  case XML_TOK_CLOSE_PAREN_ASTERISK:
1171    state->level -= 1;
1172    if (state->level == 0) {
1173      state->handler = declClose;
1174      state->role_none = XML_ROLE_ELEMENT_NONE;
1175    }
1176    return XML_ROLE_GROUP_CLOSE_REP;
1177  case XML_TOK_CLOSE_PAREN_QUESTION:
1178    state->level -= 1;
1179    if (state->level == 0) {
1180      state->handler = declClose;
1181      state->role_none = XML_ROLE_ELEMENT_NONE;
1182    }
1183    return XML_ROLE_GROUP_CLOSE_OPT;
1184  case XML_TOK_CLOSE_PAREN_PLUS:
1185    state->level -= 1;
1186    if (state->level == 0) {
1187      state->handler = declClose;
1188      state->role_none = XML_ROLE_ELEMENT_NONE;
1189    }
1190    return XML_ROLE_GROUP_CLOSE_PLUS;
1191  case XML_TOK_COMMA:
1192    state->handler = element6;
1193    return XML_ROLE_GROUP_SEQUENCE;
1194  case XML_TOK_OR:
1195    state->handler = element6;
1196    return XML_ROLE_GROUP_CHOICE;
1197  }
1198  return common(state, tok);
1199}
1200
1201#ifdef XML_DTD
1202
1203static int PTRCALL
1204condSect0(PROLOG_STATE *state,
1205          int tok,
1206          const char *ptr,
1207          const char *end,
1208          const ENCODING *enc)
1209{
1210  switch (tok) {
1211  case XML_TOK_PROLOG_S:
1212    return XML_ROLE_NONE;
1213  case XML_TOK_NAME:
1214    if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1215      state->handler = condSect1;
1216      return XML_ROLE_NONE;
1217    }
1218    if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1219      state->handler = condSect2;
1220      return XML_ROLE_NONE;
1221    }
1222    break;
1223  }
1224  return common(state, tok);
1225}
1226
1227static int PTRCALL
1228condSect1(PROLOG_STATE *state,
1229          int tok,
1230          const char *ptr,
1231          const char *end,
1232          const ENCODING *enc)
1233{
1234  switch (tok) {
1235  case XML_TOK_PROLOG_S:
1236    return XML_ROLE_NONE;
1237  case XML_TOK_OPEN_BRACKET:
1238    state->handler = externalSubset1;
1239    state->includeLevel += 1;
1240    return XML_ROLE_NONE;
1241  }
1242  return common(state, tok);
1243}
1244
1245static int PTRCALL
1246condSect2(PROLOG_STATE *state,
1247          int tok,
1248          const char *ptr,
1249          const char *end,
1250          const ENCODING *enc)
1251{
1252  switch (tok) {
1253  case XML_TOK_PROLOG_S:
1254    return XML_ROLE_NONE;
1255  case XML_TOK_OPEN_BRACKET:
1256    state->handler = externalSubset1;
1257    return XML_ROLE_IGNORE_SECT;
1258  }
1259  return common(state, tok);
1260}
1261
1262#endif /* XML_DTD */
1263
1264static int PTRCALL
1265declClose(PROLOG_STATE *state,
1266          int tok,
1267          const char *ptr,
1268          const char *end,
1269          const ENCODING *enc)
1270{
1271  switch (tok) {
1272  case XML_TOK_PROLOG_S:
1273    return state->role_none;
1274  case XML_TOK_DECL_CLOSE:
1275    setTopLevel(state);
1276    return state->role_none;
1277  }
1278  return common(state, tok);
1279}
1280
1281static int PTRCALL
1282error(PROLOG_STATE *state,
1283      int tok,
1284      const char *ptr,
1285      const char *end,
1286      const ENCODING *enc)
1287{
1288  return XML_ROLE_NONE;
1289}
1290
1291static int FASTCALL
1292common(PROLOG_STATE *state, int tok)
1293{
1294#ifdef XML_DTD
1295  if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1296    return XML_ROLE_INNER_PARAM_ENTITY_REF;
1297#endif
1298  state->handler = error;
1299  return XML_ROLE_ERROR;
1300}
1301
1302void
1303XmlPrologStateInit(PROLOG_STATE *state)
1304{
1305  state->handler = prolog0;
1306#ifdef XML_DTD
1307  state->documentEntity = 1;
1308  state->includeLevel = 0;
1309  state->inEntityValue = 0;
1310#endif /* XML_DTD */
1311}
1312
1313#ifdef XML_DTD
1314
1315void
1316XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1317{
1318  state->handler = externalSubset0;
1319  state->documentEntity = 0;
1320  state->includeLevel = 0;
1321}
1322
1323#endif /* XML_DTD */
1324