xmlrole.c revision 104349
1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2   See the file COPYING for copying permission.
3*/
4
5#ifdef COMPILED_FROM_DSP
6#include "winconfig.h"
7#elif defined(MACOS_CLASSIC)
8#include "macconfig.h"
9#else
10#include <expat_config.h>
11#endif /* ndef COMPILED_FROM_DSP */
12
13#include "internal.h"
14#include "xmlrole.h"
15#include "ascii.h"
16
17/* Doesn't check:
18
19 that ,| are not mixed in a model group
20 content of literals
21
22*/
23
24static const char KW_ANY[] = {
25    ASCII_A, ASCII_N, ASCII_Y, '\0' };
26static const char KW_ATTLIST[] = {
27    ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
28static const char KW_CDATA[] = {
29    ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
30static const char KW_DOCTYPE[] = {
31    ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
32static const char KW_ELEMENT[] = {
33    ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
34static const char KW_EMPTY[] = {
35    ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
36static const char KW_ENTITIES[] = {
37    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
38    '\0' };
39static const char KW_ENTITY[] = {
40    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
41static const char KW_FIXED[] = {
42    ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
43static const char KW_ID[] = {
44    ASCII_I, ASCII_D, '\0' };
45static const char KW_IDREF[] = {
46    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
47static const char KW_IDREFS[] = {
48    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
49static const char KW_IGNORE[] = {
50    ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
51static const char KW_IMPLIED[] = {
52    ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
53static const char KW_INCLUDE[] = {
54    ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
55static const char KW_NDATA[] = {
56    ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
57static const char KW_NMTOKEN[] = {
58    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
59static const char KW_NMTOKENS[] = {
60    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
61    '\0' };
62static const char KW_NOTATION[] =
63    { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
64      '\0' };
65static const char KW_PCDATA[] = {
66    ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
67static const char KW_PUBLIC[] = {
68    ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
69static const char KW_REQUIRED[] = {
70    ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
71    '\0' };
72static const char KW_SYSTEM[] = {
73    ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
74
75#ifndef MIN_BYTES_PER_CHAR
76#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
77#endif
78
79#ifdef XML_DTD
80#define setTopLevel(state) \
81  ((state)->handler = ((state)->documentEntity \
82                       ? internalSubset \
83                       : externalSubset1))
84#else /* not XML_DTD */
85#define setTopLevel(state) ((state)->handler = internalSubset)
86#endif /* not XML_DTD */
87
88typedef int FASTCALL PROLOG_HANDLER(PROLOG_STATE *state,
89                                    int tok,
90                                    const char *ptr,
91                                    const char *end,
92                                    const ENCODING *enc);
93
94static PROLOG_HANDLER
95  prolog0, prolog1, prolog2,
96  doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
97  internalSubset,
98  entity0, entity1, entity2, entity3, entity4, entity5, entity6,
99  entity7, entity8, entity9, entity10,
100  notation0, notation1, notation2, notation3, notation4,
101  attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
102  attlist7, attlist8, attlist9,
103  element0, element1, element2, element3, element4, element5, element6,
104  element7,
105#ifdef XML_DTD
106  externalSubset0, externalSubset1,
107  condSect0, condSect1, condSect2,
108#endif /* XML_DTD */
109  declClose,
110  error;
111
112static int FASTCALL common(PROLOG_STATE *state, int tok);
113
114static int FASTCALL
115prolog0(PROLOG_STATE *state,
116        int tok,
117        const char *ptr,
118        const char *end,
119        const ENCODING *enc)
120{
121  switch (tok) {
122  case XML_TOK_PROLOG_S:
123    state->handler = prolog1;
124    return XML_ROLE_NONE;
125  case XML_TOK_XML_DECL:
126    state->handler = prolog1;
127    return XML_ROLE_XML_DECL;
128  case XML_TOK_PI:
129    state->handler = prolog1;
130    return XML_ROLE_PI;
131  case XML_TOK_COMMENT:
132    state->handler = prolog1;
133    return XML_ROLE_COMMENT;
134  case XML_TOK_BOM:
135    return XML_ROLE_NONE;
136  case XML_TOK_DECL_OPEN:
137    if (!XmlNameMatchesAscii(enc,
138                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
139                             end,
140                             KW_DOCTYPE))
141      break;
142    state->handler = doctype0;
143    return XML_ROLE_DOCTYPE_NONE;
144  case XML_TOK_INSTANCE_START:
145    state->handler = error;
146    return XML_ROLE_INSTANCE_START;
147  }
148  return common(state, tok);
149}
150
151static int FASTCALL
152prolog1(PROLOG_STATE *state,
153        int tok,
154        const char *ptr,
155        const char *end,
156        const ENCODING *enc)
157{
158  switch (tok) {
159  case XML_TOK_PROLOG_S:
160    return XML_ROLE_NONE;
161  case XML_TOK_PI:
162    return XML_ROLE_PI;
163  case XML_TOK_COMMENT:
164    return XML_ROLE_COMMENT;
165  case XML_TOK_BOM:
166    return XML_ROLE_NONE;
167  case XML_TOK_DECL_OPEN:
168    if (!XmlNameMatchesAscii(enc,
169                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
170                             end,
171                             KW_DOCTYPE))
172      break;
173    state->handler = doctype0;
174    return XML_ROLE_DOCTYPE_NONE;
175  case XML_TOK_INSTANCE_START:
176    state->handler = error;
177    return XML_ROLE_INSTANCE_START;
178  }
179  return common(state, tok);
180}
181
182static int FASTCALL
183prolog2(PROLOG_STATE *state,
184        int tok,
185        const char *ptr,
186        const char *end,
187        const ENCODING *enc)
188{
189  switch (tok) {
190  case XML_TOK_PROLOG_S:
191    return XML_ROLE_NONE;
192  case XML_TOK_PI:
193    return XML_ROLE_PI;
194  case XML_TOK_COMMENT:
195    return XML_ROLE_COMMENT;
196  case XML_TOK_INSTANCE_START:
197    state->handler = error;
198    return XML_ROLE_INSTANCE_START;
199  }
200  return common(state, tok);
201}
202
203static int FASTCALL
204doctype0(PROLOG_STATE *state,
205         int tok,
206         const char *ptr,
207         const char *end,
208         const ENCODING *enc)
209{
210  switch (tok) {
211  case XML_TOK_PROLOG_S:
212    return XML_ROLE_DOCTYPE_NONE;
213  case XML_TOK_NAME:
214  case XML_TOK_PREFIXED_NAME:
215    state->handler = doctype1;
216    return XML_ROLE_DOCTYPE_NAME;
217  }
218  return common(state, tok);
219}
220
221static int FASTCALL
222doctype1(PROLOG_STATE *state,
223         int tok,
224         const char *ptr,
225         const char *end,
226         const ENCODING *enc)
227{
228  switch (tok) {
229  case XML_TOK_PROLOG_S:
230    return XML_ROLE_DOCTYPE_NONE;
231  case XML_TOK_OPEN_BRACKET:
232    state->handler = internalSubset;
233    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
234  case XML_TOK_DECL_CLOSE:
235    state->handler = prolog2;
236    return XML_ROLE_DOCTYPE_CLOSE;
237  case XML_TOK_NAME:
238    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
239      state->handler = doctype3;
240      return XML_ROLE_DOCTYPE_NONE;
241    }
242    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
243      state->handler = doctype2;
244      return XML_ROLE_DOCTYPE_NONE;
245    }
246    break;
247  }
248  return common(state, tok);
249}
250
251static int FASTCALL
252doctype2(PROLOG_STATE *state,
253         int tok,
254         const char *ptr,
255         const char *end,
256         const ENCODING *enc)
257{
258  switch (tok) {
259  case XML_TOK_PROLOG_S:
260    return XML_ROLE_DOCTYPE_NONE;
261  case XML_TOK_LITERAL:
262    state->handler = doctype3;
263    return XML_ROLE_DOCTYPE_PUBLIC_ID;
264  }
265  return common(state, tok);
266}
267
268static int FASTCALL
269doctype3(PROLOG_STATE *state,
270         int tok,
271         const char *ptr,
272         const char *end,
273         const ENCODING *enc)
274{
275  switch (tok) {
276  case XML_TOK_PROLOG_S:
277    return XML_ROLE_DOCTYPE_NONE;
278  case XML_TOK_LITERAL:
279    state->handler = doctype4;
280    return XML_ROLE_DOCTYPE_SYSTEM_ID;
281  }
282  return common(state, tok);
283}
284
285static int FASTCALL
286doctype4(PROLOG_STATE *state,
287         int tok,
288         const char *ptr,
289         const char *end,
290         const ENCODING *enc)
291{
292  switch (tok) {
293  case XML_TOK_PROLOG_S:
294    return XML_ROLE_DOCTYPE_NONE;
295  case XML_TOK_OPEN_BRACKET:
296    state->handler = internalSubset;
297    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
298  case XML_TOK_DECL_CLOSE:
299    state->handler = prolog2;
300    return XML_ROLE_DOCTYPE_CLOSE;
301  }
302  return common(state, tok);
303}
304
305static int FASTCALL
306doctype5(PROLOG_STATE *state,
307         int tok,
308         const char *ptr,
309         const char *end,
310         const ENCODING *enc)
311{
312  switch (tok) {
313  case XML_TOK_PROLOG_S:
314    return XML_ROLE_DOCTYPE_NONE;
315  case XML_TOK_DECL_CLOSE:
316    state->handler = prolog2;
317    return XML_ROLE_DOCTYPE_CLOSE;
318  }
319  return common(state, tok);
320}
321
322static int FASTCALL
323internalSubset(PROLOG_STATE *state,
324               int tok,
325               const char *ptr,
326               const char *end,
327               const ENCODING *enc)
328{
329  switch (tok) {
330  case XML_TOK_PROLOG_S:
331    return XML_ROLE_NONE;
332  case XML_TOK_DECL_OPEN:
333    if (XmlNameMatchesAscii(enc,
334                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
335                            end,
336                            KW_ENTITY)) {
337      state->handler = entity0;
338      return XML_ROLE_ENTITY_NONE;
339    }
340    if (XmlNameMatchesAscii(enc,
341                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
342                            end,
343                            KW_ATTLIST)) {
344      state->handler = attlist0;
345      return XML_ROLE_ATTLIST_NONE;
346    }
347    if (XmlNameMatchesAscii(enc,
348                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
349                            end,
350                            KW_ELEMENT)) {
351      state->handler = element0;
352      return XML_ROLE_ELEMENT_NONE;
353    }
354    if (XmlNameMatchesAscii(enc,
355                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
356                            end,
357                            KW_NOTATION)) {
358      state->handler = notation0;
359      return XML_ROLE_NOTATION_NONE;
360    }
361    break;
362  case XML_TOK_PI:
363    return XML_ROLE_PI;
364  case XML_TOK_COMMENT:
365    return XML_ROLE_COMMENT;
366  case XML_TOK_PARAM_ENTITY_REF:
367    return XML_ROLE_PARAM_ENTITY_REF;
368  case XML_TOK_CLOSE_BRACKET:
369    state->handler = doctype5;
370    return XML_ROLE_DOCTYPE_NONE;
371  }
372  return common(state, tok);
373}
374
375#ifdef XML_DTD
376
377static int FASTCALL
378externalSubset0(PROLOG_STATE *state,
379                int tok,
380                const char *ptr,
381                const char *end,
382                const ENCODING *enc)
383{
384  state->handler = externalSubset1;
385  if (tok == XML_TOK_XML_DECL)
386    return XML_ROLE_TEXT_DECL;
387  return externalSubset1(state, tok, ptr, end, enc);
388}
389
390static int FASTCALL
391externalSubset1(PROLOG_STATE *state,
392                int tok,
393                const char *ptr,
394                const char *end,
395                const ENCODING *enc)
396{
397  switch (tok) {
398  case XML_TOK_COND_SECT_OPEN:
399    state->handler = condSect0;
400    return XML_ROLE_NONE;
401  case XML_TOK_COND_SECT_CLOSE:
402    if (state->includeLevel == 0)
403      break;
404    state->includeLevel -= 1;
405    return XML_ROLE_NONE;
406  case XML_TOK_PROLOG_S:
407    return XML_ROLE_NONE;
408  case XML_TOK_CLOSE_BRACKET:
409    break;
410  case XML_TOK_NONE:
411    if (state->includeLevel)
412      break;
413    return XML_ROLE_NONE;
414  default:
415    return internalSubset(state, tok, ptr, end, enc);
416  }
417  return common(state, tok);
418}
419
420#endif /* XML_DTD */
421
422static int FASTCALL
423entity0(PROLOG_STATE *state,
424        int tok,
425        const char *ptr,
426        const char *end,
427        const ENCODING *enc)
428{
429  switch (tok) {
430  case XML_TOK_PROLOG_S:
431    return XML_ROLE_ENTITY_NONE;
432  case XML_TOK_PERCENT:
433    state->handler = entity1;
434    return XML_ROLE_ENTITY_NONE;
435  case XML_TOK_NAME:
436    state->handler = entity2;
437    return XML_ROLE_GENERAL_ENTITY_NAME;
438  }
439  return common(state, tok);
440}
441
442static int FASTCALL
443entity1(PROLOG_STATE *state,
444        int tok,
445        const char *ptr,
446        const char *end,
447        const ENCODING *enc)
448{
449  switch (tok) {
450  case XML_TOK_PROLOG_S:
451    return XML_ROLE_ENTITY_NONE;
452  case XML_TOK_NAME:
453    state->handler = entity7;
454    return XML_ROLE_PARAM_ENTITY_NAME;
455  }
456  return common(state, tok);
457}
458
459static int FASTCALL
460entity2(PROLOG_STATE *state,
461        int tok,
462        const char *ptr,
463        const char *end,
464        const ENCODING *enc)
465{
466  switch (tok) {
467  case XML_TOK_PROLOG_S:
468    return XML_ROLE_ENTITY_NONE;
469  case XML_TOK_NAME:
470    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
471      state->handler = entity4;
472      return XML_ROLE_ENTITY_NONE;
473    }
474    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
475      state->handler = entity3;
476      return XML_ROLE_ENTITY_NONE;
477    }
478    break;
479  case XML_TOK_LITERAL:
480    state->handler = declClose;
481    state->role_none = XML_ROLE_ENTITY_NONE;
482    return XML_ROLE_ENTITY_VALUE;
483  }
484  return common(state, tok);
485}
486
487static int FASTCALL
488entity3(PROLOG_STATE *state,
489        int tok,
490        const char *ptr,
491        const char *end,
492        const ENCODING *enc)
493{
494  switch (tok) {
495  case XML_TOK_PROLOG_S:
496    return XML_ROLE_ENTITY_NONE;
497  case XML_TOK_LITERAL:
498    state->handler = entity4;
499    return XML_ROLE_ENTITY_PUBLIC_ID;
500  }
501  return common(state, tok);
502}
503
504static int FASTCALL
505entity4(PROLOG_STATE *state,
506        int tok,
507        const char *ptr,
508        const char *end,
509        const ENCODING *enc)
510{
511  switch (tok) {
512  case XML_TOK_PROLOG_S:
513    return XML_ROLE_ENTITY_NONE;
514  case XML_TOK_LITERAL:
515    state->handler = entity5;
516    return XML_ROLE_ENTITY_SYSTEM_ID;
517  }
518  return common(state, tok);
519}
520
521static int FASTCALL
522entity5(PROLOG_STATE *state,
523        int tok,
524        const char *ptr,
525        const char *end,
526        const ENCODING *enc)
527{
528  switch (tok) {
529  case XML_TOK_PROLOG_S:
530    return XML_ROLE_ENTITY_NONE;
531  case XML_TOK_DECL_CLOSE:
532    setTopLevel(state);
533    return XML_ROLE_ENTITY_COMPLETE;
534  case XML_TOK_NAME:
535    if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
536      state->handler = entity6;
537      return XML_ROLE_ENTITY_NONE;
538    }
539    break;
540  }
541  return common(state, tok);
542}
543
544static int FASTCALL
545entity6(PROLOG_STATE *state,
546        int tok,
547        const char *ptr,
548        const char *end,
549        const ENCODING *enc)
550{
551  switch (tok) {
552  case XML_TOK_PROLOG_S:
553    return XML_ROLE_ENTITY_NONE;
554  case XML_TOK_NAME:
555    state->handler = declClose;
556    state->role_none = XML_ROLE_ENTITY_NONE;
557    return XML_ROLE_ENTITY_NOTATION_NAME;
558  }
559  return common(state, tok);
560}
561
562static int FASTCALL
563entity7(PROLOG_STATE *state,
564        int tok,
565        const char *ptr,
566        const char *end,
567        const ENCODING *enc)
568{
569  switch (tok) {
570  case XML_TOK_PROLOG_S:
571    return XML_ROLE_ENTITY_NONE;
572  case XML_TOK_NAME:
573    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
574      state->handler = entity9;
575      return XML_ROLE_ENTITY_NONE;
576    }
577    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
578      state->handler = entity8;
579      return XML_ROLE_ENTITY_NONE;
580    }
581    break;
582  case XML_TOK_LITERAL:
583    state->handler = declClose;
584    state->role_none = XML_ROLE_ENTITY_NONE;
585    return XML_ROLE_ENTITY_VALUE;
586  }
587  return common(state, tok);
588}
589
590static int FASTCALL
591entity8(PROLOG_STATE *state,
592        int tok,
593        const char *ptr,
594        const char *end,
595        const ENCODING *enc)
596{
597  switch (tok) {
598  case XML_TOK_PROLOG_S:
599    return XML_ROLE_ENTITY_NONE;
600  case XML_TOK_LITERAL:
601    state->handler = entity9;
602    return XML_ROLE_ENTITY_PUBLIC_ID;
603  }
604  return common(state, tok);
605}
606
607static int FASTCALL
608entity9(PROLOG_STATE *state,
609        int tok,
610        const char *ptr,
611        const char *end,
612        const ENCODING *enc)
613{
614  switch (tok) {
615  case XML_TOK_PROLOG_S:
616    return XML_ROLE_ENTITY_NONE;
617  case XML_TOK_LITERAL:
618    state->handler = entity10;
619    return XML_ROLE_ENTITY_SYSTEM_ID;
620  }
621  return common(state, tok);
622}
623
624static int FASTCALL
625entity10(PROLOG_STATE *state,
626         int tok,
627         const char *ptr,
628         const char *end,
629         const ENCODING *enc)
630{
631  switch (tok) {
632  case XML_TOK_PROLOG_S:
633    return XML_ROLE_ENTITY_NONE;
634  case XML_TOK_DECL_CLOSE:
635    setTopLevel(state);
636    return XML_ROLE_ENTITY_COMPLETE;
637  }
638  return common(state, tok);
639}
640
641static int FASTCALL
642notation0(PROLOG_STATE *state,
643          int tok,
644          const char *ptr,
645          const char *end,
646          const ENCODING *enc)
647{
648  switch (tok) {
649  case XML_TOK_PROLOG_S:
650    return XML_ROLE_NOTATION_NONE;
651  case XML_TOK_NAME:
652    state->handler = notation1;
653    return XML_ROLE_NOTATION_NAME;
654  }
655  return common(state, tok);
656}
657
658static int FASTCALL
659notation1(PROLOG_STATE *state,
660          int tok,
661          const char *ptr,
662          const char *end,
663          const ENCODING *enc)
664{
665  switch (tok) {
666  case XML_TOK_PROLOG_S:
667    return XML_ROLE_NOTATION_NONE;
668  case XML_TOK_NAME:
669    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
670      state->handler = notation3;
671      return XML_ROLE_NOTATION_NONE;
672    }
673    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
674      state->handler = notation2;
675      return XML_ROLE_NOTATION_NONE;
676    }
677    break;
678  }
679  return common(state, tok);
680}
681
682static int FASTCALL
683notation2(PROLOG_STATE *state,
684          int tok,
685          const char *ptr,
686          const char *end,
687          const ENCODING *enc)
688{
689  switch (tok) {
690  case XML_TOK_PROLOG_S:
691    return XML_ROLE_NOTATION_NONE;
692  case XML_TOK_LITERAL:
693    state->handler = notation4;
694    return XML_ROLE_NOTATION_PUBLIC_ID;
695  }
696  return common(state, tok);
697}
698
699static int FASTCALL
700notation3(PROLOG_STATE *state,
701          int tok,
702          const char *ptr,
703          const char *end,
704          const ENCODING *enc)
705{
706  switch (tok) {
707  case XML_TOK_PROLOG_S:
708    return XML_ROLE_NOTATION_NONE;
709  case XML_TOK_LITERAL:
710    state->handler = declClose;
711    state->role_none = XML_ROLE_NOTATION_NONE;
712    return XML_ROLE_NOTATION_SYSTEM_ID;
713  }
714  return common(state, tok);
715}
716
717static int FASTCALL
718notation4(PROLOG_STATE *state,
719          int tok,
720          const char *ptr,
721          const char *end,
722          const ENCODING *enc)
723{
724  switch (tok) {
725  case XML_TOK_PROLOG_S:
726    return XML_ROLE_NOTATION_NONE;
727  case XML_TOK_LITERAL:
728    state->handler = declClose;
729    state->role_none = XML_ROLE_NOTATION_NONE;
730    return XML_ROLE_NOTATION_SYSTEM_ID;
731  case XML_TOK_DECL_CLOSE:
732    setTopLevel(state);
733    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
734  }
735  return common(state, tok);
736}
737
738static int FASTCALL
739attlist0(PROLOG_STATE *state,
740         int tok,
741         const char *ptr,
742         const char *end,
743         const ENCODING *enc)
744{
745  switch (tok) {
746  case XML_TOK_PROLOG_S:
747    return XML_ROLE_ATTLIST_NONE;
748  case XML_TOK_NAME:
749  case XML_TOK_PREFIXED_NAME:
750    state->handler = attlist1;
751    return XML_ROLE_ATTLIST_ELEMENT_NAME;
752  }
753  return common(state, tok);
754}
755
756static int FASTCALL
757attlist1(PROLOG_STATE *state,
758         int tok,
759         const char *ptr,
760         const char *end,
761         const ENCODING *enc)
762{
763  switch (tok) {
764  case XML_TOK_PROLOG_S:
765    return XML_ROLE_ATTLIST_NONE;
766  case XML_TOK_DECL_CLOSE:
767    setTopLevel(state);
768    return XML_ROLE_ATTLIST_NONE;
769  case XML_TOK_NAME:
770  case XML_TOK_PREFIXED_NAME:
771    state->handler = attlist2;
772    return XML_ROLE_ATTRIBUTE_NAME;
773  }
774  return common(state, tok);
775}
776
777static int FASTCALL
778attlist2(PROLOG_STATE *state,
779         int tok,
780         const char *ptr,
781         const char *end,
782         const ENCODING *enc)
783{
784  switch (tok) {
785  case XML_TOK_PROLOG_S:
786    return XML_ROLE_ATTLIST_NONE;
787  case XML_TOK_NAME:
788    {
789      static const char *types[] = {
790        KW_CDATA,
791        KW_ID,
792        KW_IDREF,
793        KW_IDREFS,
794        KW_ENTITY,
795        KW_ENTITIES,
796        KW_NMTOKEN,
797        KW_NMTOKENS,
798      };
799      int i;
800      for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
801        if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
802          state->handler = attlist8;
803          return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
804        }
805    }
806    if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
807      state->handler = attlist5;
808      return XML_ROLE_ATTLIST_NONE;
809    }
810    break;
811  case XML_TOK_OPEN_PAREN:
812    state->handler = attlist3;
813    return XML_ROLE_ATTLIST_NONE;
814  }
815  return common(state, tok);
816}
817
818static int FASTCALL
819attlist3(PROLOG_STATE *state,
820         int tok,
821         const char *ptr,
822         const char *end,
823         const ENCODING *enc)
824{
825  switch (tok) {
826  case XML_TOK_PROLOG_S:
827    return XML_ROLE_ATTLIST_NONE;
828  case XML_TOK_NMTOKEN:
829  case XML_TOK_NAME:
830  case XML_TOK_PREFIXED_NAME:
831    state->handler = attlist4;
832    return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
833  }
834  return common(state, tok);
835}
836
837static int FASTCALL
838attlist4(PROLOG_STATE *state,
839         int tok,
840         const char *ptr,
841         const char *end,
842         const ENCODING *enc)
843{
844  switch (tok) {
845  case XML_TOK_PROLOG_S:
846    return XML_ROLE_ATTLIST_NONE;
847  case XML_TOK_CLOSE_PAREN:
848    state->handler = attlist8;
849    return XML_ROLE_ATTLIST_NONE;
850  case XML_TOK_OR:
851    state->handler = attlist3;
852    return XML_ROLE_ATTLIST_NONE;
853  }
854  return common(state, tok);
855}
856
857static int FASTCALL
858attlist5(PROLOG_STATE *state,
859         int tok,
860         const char *ptr,
861         const char *end,
862         const ENCODING *enc)
863{
864  switch (tok) {
865  case XML_TOK_PROLOG_S:
866    return XML_ROLE_ATTLIST_NONE;
867  case XML_TOK_OPEN_PAREN:
868    state->handler = attlist6;
869    return XML_ROLE_ATTLIST_NONE;
870  }
871  return common(state, tok);
872}
873
874static int FASTCALL
875attlist6(PROLOG_STATE *state,
876         int tok,
877         const char *ptr,
878         const char *end,
879         const ENCODING *enc)
880{
881  switch (tok) {
882  case XML_TOK_PROLOG_S:
883    return XML_ROLE_ATTLIST_NONE;
884  case XML_TOK_NAME:
885    state->handler = attlist7;
886    return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
887  }
888  return common(state, tok);
889}
890
891static int FASTCALL
892attlist7(PROLOG_STATE *state,
893         int tok,
894         const char *ptr,
895         const char *end,
896         const ENCODING *enc)
897{
898  switch (tok) {
899  case XML_TOK_PROLOG_S:
900    return XML_ROLE_ATTLIST_NONE;
901  case XML_TOK_CLOSE_PAREN:
902    state->handler = attlist8;
903    return XML_ROLE_ATTLIST_NONE;
904  case XML_TOK_OR:
905    state->handler = attlist6;
906    return XML_ROLE_ATTLIST_NONE;
907  }
908  return common(state, tok);
909}
910
911/* default value */
912static int FASTCALL
913attlist8(PROLOG_STATE *state,
914         int tok,
915         const char *ptr,
916         const char *end,
917         const ENCODING *enc)
918{
919  switch (tok) {
920  case XML_TOK_PROLOG_S:
921    return XML_ROLE_ATTLIST_NONE;
922  case XML_TOK_POUND_NAME:
923    if (XmlNameMatchesAscii(enc,
924                            ptr + MIN_BYTES_PER_CHAR(enc),
925                            end,
926                            KW_IMPLIED)) {
927      state->handler = attlist1;
928      return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
929    }
930    if (XmlNameMatchesAscii(enc,
931                            ptr + MIN_BYTES_PER_CHAR(enc),
932                            end,
933                            KW_REQUIRED)) {
934      state->handler = attlist1;
935      return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
936    }
937    if (XmlNameMatchesAscii(enc,
938                            ptr + MIN_BYTES_PER_CHAR(enc),
939                            end,
940                            KW_FIXED)) {
941      state->handler = attlist9;
942      return XML_ROLE_ATTLIST_NONE;
943    }
944    break;
945  case XML_TOK_LITERAL:
946    state->handler = attlist1;
947    return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
948  }
949  return common(state, tok);
950}
951
952static int FASTCALL
953attlist9(PROLOG_STATE *state,
954         int tok,
955         const char *ptr,
956         const char *end,
957         const ENCODING *enc)
958{
959  switch (tok) {
960  case XML_TOK_PROLOG_S:
961    return XML_ROLE_ATTLIST_NONE;
962  case XML_TOK_LITERAL:
963    state->handler = attlist1;
964    return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
965  }
966  return common(state, tok);
967}
968
969static int FASTCALL
970element0(PROLOG_STATE *state,
971         int tok,
972         const char *ptr,
973         const char *end,
974         const ENCODING *enc)
975{
976  switch (tok) {
977  case XML_TOK_PROLOG_S:
978    return XML_ROLE_ELEMENT_NONE;
979  case XML_TOK_NAME:
980  case XML_TOK_PREFIXED_NAME:
981    state->handler = element1;
982    return XML_ROLE_ELEMENT_NAME;
983  }
984  return common(state, tok);
985}
986
987static int FASTCALL
988element1(PROLOG_STATE *state,
989         int tok,
990         const char *ptr,
991         const char *end,
992         const ENCODING *enc)
993{
994  switch (tok) {
995  case XML_TOK_PROLOG_S:
996    return XML_ROLE_ELEMENT_NONE;
997  case XML_TOK_NAME:
998    if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
999      state->handler = declClose;
1000      state->role_none = XML_ROLE_ELEMENT_NONE;
1001      return XML_ROLE_CONTENT_EMPTY;
1002    }
1003    if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1004      state->handler = declClose;
1005      state->role_none = XML_ROLE_ELEMENT_NONE;
1006      return XML_ROLE_CONTENT_ANY;
1007    }
1008    break;
1009  case XML_TOK_OPEN_PAREN:
1010    state->handler = element2;
1011    state->level = 1;
1012    return XML_ROLE_GROUP_OPEN;
1013  }
1014  return common(state, tok);
1015}
1016
1017static int FASTCALL
1018element2(PROLOG_STATE *state,
1019         int tok,
1020         const char *ptr,
1021         const char *end,
1022         const ENCODING *enc)
1023{
1024  switch (tok) {
1025  case XML_TOK_PROLOG_S:
1026    return XML_ROLE_ELEMENT_NONE;
1027  case XML_TOK_POUND_NAME:
1028    if (XmlNameMatchesAscii(enc,
1029                            ptr + MIN_BYTES_PER_CHAR(enc),
1030                            end,
1031                            KW_PCDATA)) {
1032      state->handler = element3;
1033      return XML_ROLE_CONTENT_PCDATA;
1034    }
1035    break;
1036  case XML_TOK_OPEN_PAREN:
1037    state->level = 2;
1038    state->handler = element6;
1039    return XML_ROLE_GROUP_OPEN;
1040  case XML_TOK_NAME:
1041  case XML_TOK_PREFIXED_NAME:
1042    state->handler = element7;
1043    return XML_ROLE_CONTENT_ELEMENT;
1044  case XML_TOK_NAME_QUESTION:
1045    state->handler = element7;
1046    return XML_ROLE_CONTENT_ELEMENT_OPT;
1047  case XML_TOK_NAME_ASTERISK:
1048    state->handler = element7;
1049    return XML_ROLE_CONTENT_ELEMENT_REP;
1050  case XML_TOK_NAME_PLUS:
1051    state->handler = element7;
1052    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1053  }
1054  return common(state, tok);
1055}
1056
1057static int FASTCALL
1058element3(PROLOG_STATE *state,
1059         int tok,
1060         const char *ptr,
1061         const char *end,
1062         const ENCODING *enc)
1063{
1064  switch (tok) {
1065  case XML_TOK_PROLOG_S:
1066    return XML_ROLE_ELEMENT_NONE;
1067  case XML_TOK_CLOSE_PAREN:
1068    state->handler = declClose;
1069    state->role_none = XML_ROLE_ELEMENT_NONE;
1070    return XML_ROLE_GROUP_CLOSE;
1071  case XML_TOK_CLOSE_PAREN_ASTERISK:
1072    state->handler = declClose;
1073    state->role_none = XML_ROLE_ELEMENT_NONE;
1074    return XML_ROLE_GROUP_CLOSE_REP;
1075  case XML_TOK_OR:
1076    state->handler = element4;
1077    return XML_ROLE_ELEMENT_NONE;
1078  }
1079  return common(state, tok);
1080}
1081
1082static int FASTCALL
1083element4(PROLOG_STATE *state,
1084         int tok,
1085         const char *ptr,
1086         const char *end,
1087         const ENCODING *enc)
1088{
1089  switch (tok) {
1090  case XML_TOK_PROLOG_S:
1091    return XML_ROLE_ELEMENT_NONE;
1092  case XML_TOK_NAME:
1093  case XML_TOK_PREFIXED_NAME:
1094    state->handler = element5;
1095    return XML_ROLE_CONTENT_ELEMENT;
1096  }
1097  return common(state, tok);
1098}
1099
1100static int FASTCALL
1101element5(PROLOG_STATE *state,
1102         int tok,
1103         const char *ptr,
1104         const char *end,
1105         const ENCODING *enc)
1106{
1107  switch (tok) {
1108  case XML_TOK_PROLOG_S:
1109    return XML_ROLE_ELEMENT_NONE;
1110  case XML_TOK_CLOSE_PAREN_ASTERISK:
1111    state->handler = declClose;
1112    state->role_none = XML_ROLE_ELEMENT_NONE;
1113    return XML_ROLE_GROUP_CLOSE_REP;
1114  case XML_TOK_OR:
1115    state->handler = element4;
1116    return XML_ROLE_ELEMENT_NONE;
1117  }
1118  return common(state, tok);
1119}
1120
1121static int FASTCALL
1122element6(PROLOG_STATE *state,
1123         int tok,
1124         const char *ptr,
1125         const char *end,
1126         const ENCODING *enc)
1127{
1128  switch (tok) {
1129  case XML_TOK_PROLOG_S:
1130    return XML_ROLE_ELEMENT_NONE;
1131  case XML_TOK_OPEN_PAREN:
1132    state->level += 1;
1133    return XML_ROLE_GROUP_OPEN;
1134  case XML_TOK_NAME:
1135  case XML_TOK_PREFIXED_NAME:
1136    state->handler = element7;
1137    return XML_ROLE_CONTENT_ELEMENT;
1138  case XML_TOK_NAME_QUESTION:
1139    state->handler = element7;
1140    return XML_ROLE_CONTENT_ELEMENT_OPT;
1141  case XML_TOK_NAME_ASTERISK:
1142    state->handler = element7;
1143    return XML_ROLE_CONTENT_ELEMENT_REP;
1144  case XML_TOK_NAME_PLUS:
1145    state->handler = element7;
1146    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1147  }
1148  return common(state, tok);
1149}
1150
1151static int FASTCALL
1152element7(PROLOG_STATE *state,
1153         int tok,
1154         const char *ptr,
1155         const char *end,
1156         const ENCODING *enc)
1157{
1158  switch (tok) {
1159  case XML_TOK_PROLOG_S:
1160    return XML_ROLE_ELEMENT_NONE;
1161  case XML_TOK_CLOSE_PAREN:
1162    state->level -= 1;
1163    if (state->level == 0) {
1164      state->handler = declClose;
1165      state->role_none = XML_ROLE_ELEMENT_NONE;
1166    }
1167    return XML_ROLE_GROUP_CLOSE;
1168  case XML_TOK_CLOSE_PAREN_ASTERISK:
1169    state->level -= 1;
1170    if (state->level == 0) {
1171      state->handler = declClose;
1172      state->role_none = XML_ROLE_ELEMENT_NONE;
1173    }
1174    return XML_ROLE_GROUP_CLOSE_REP;
1175  case XML_TOK_CLOSE_PAREN_QUESTION:
1176    state->level -= 1;
1177    if (state->level == 0) {
1178      state->handler = declClose;
1179      state->role_none = XML_ROLE_ELEMENT_NONE;
1180    }
1181    return XML_ROLE_GROUP_CLOSE_OPT;
1182  case XML_TOK_CLOSE_PAREN_PLUS:
1183    state->level -= 1;
1184    if (state->level == 0) {
1185      state->handler = declClose;
1186      state->role_none = XML_ROLE_ELEMENT_NONE;
1187    }
1188    return XML_ROLE_GROUP_CLOSE_PLUS;
1189  case XML_TOK_COMMA:
1190    state->handler = element6;
1191    return XML_ROLE_GROUP_SEQUENCE;
1192  case XML_TOK_OR:
1193    state->handler = element6;
1194    return XML_ROLE_GROUP_CHOICE;
1195  }
1196  return common(state, tok);
1197}
1198
1199#ifdef XML_DTD
1200
1201static int FASTCALL
1202condSect0(PROLOG_STATE *state,
1203          int tok,
1204          const char *ptr,
1205          const char *end,
1206          const ENCODING *enc)
1207{
1208  switch (tok) {
1209  case XML_TOK_PROLOG_S:
1210    return XML_ROLE_NONE;
1211  case XML_TOK_NAME:
1212    if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1213      state->handler = condSect1;
1214      return XML_ROLE_NONE;
1215    }
1216    if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1217      state->handler = condSect2;
1218      return XML_ROLE_NONE;
1219    }
1220    break;
1221  }
1222  return common(state, tok);
1223}
1224
1225static int FASTCALL
1226condSect1(PROLOG_STATE *state,
1227          int tok,
1228          const char *ptr,
1229          const char *end,
1230          const ENCODING *enc)
1231{
1232  switch (tok) {
1233  case XML_TOK_PROLOG_S:
1234    return XML_ROLE_NONE;
1235  case XML_TOK_OPEN_BRACKET:
1236    state->handler = externalSubset1;
1237    state->includeLevel += 1;
1238    return XML_ROLE_NONE;
1239  }
1240  return common(state, tok);
1241}
1242
1243static int FASTCALL
1244condSect2(PROLOG_STATE *state,
1245          int tok,
1246          const char *ptr,
1247          const char *end,
1248          const ENCODING *enc)
1249{
1250  switch (tok) {
1251  case XML_TOK_PROLOG_S:
1252    return XML_ROLE_NONE;
1253  case XML_TOK_OPEN_BRACKET:
1254    state->handler = externalSubset1;
1255    return XML_ROLE_IGNORE_SECT;
1256  }
1257  return common(state, tok);
1258}
1259
1260#endif /* XML_DTD */
1261
1262static int FASTCALL
1263declClose(PROLOG_STATE *state,
1264          int tok,
1265          const char *ptr,
1266          const char *end,
1267          const ENCODING *enc)
1268{
1269  switch (tok) {
1270  case XML_TOK_PROLOG_S:
1271    return state->role_none;
1272  case XML_TOK_DECL_CLOSE:
1273    setTopLevel(state);
1274    return state->role_none;
1275  }
1276  return common(state, tok);
1277}
1278
1279static int FASTCALL
1280error(PROLOG_STATE *state,
1281      int tok,
1282      const char *ptr,
1283      const char *end,
1284      const ENCODING *enc)
1285{
1286  return XML_ROLE_NONE;
1287}
1288
1289static int FASTCALL
1290common(PROLOG_STATE *state, int tok)
1291{
1292#ifdef XML_DTD
1293  if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1294    return XML_ROLE_INNER_PARAM_ENTITY_REF;
1295#endif
1296  state->handler = error;
1297  return XML_ROLE_ERROR;
1298}
1299
1300void
1301XmlPrologStateInit(PROLOG_STATE *state)
1302{
1303  state->handler = prolog0;
1304#ifdef XML_DTD
1305  state->documentEntity = 1;
1306  state->includeLevel = 0;
1307  state->inEntityValue = 0;
1308#endif /* XML_DTD */
1309}
1310
1311#ifdef XML_DTD
1312
1313void
1314XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1315{
1316  state->handler = externalSubset0;
1317  state->documentEntity = 0;
1318  state->includeLevel = 0;
1319}
1320
1321#endif /* XML_DTD */
1322