1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2   See the file COPYING for copying permission.
3*/
4
5#ifdef COMPILED_FROM_DSP
6#include "winconfig.h"
7#elif defined(OS2_32)
8#include "os2config.h"
9#elif defined(__MSDOS__)
10#include "dosconfig.h"
11#elif defined(MACOS_CLASSIC)
12#include "macconfig.h"
13#else
14#include "expat_config.h"
15#endif /* ndef COMPILED_FROM_DSP */
16
17#include "internal.h"
18#include "xmlrole.h"
19#include "ascii.h"
20
21/* Doesn't check:
22
23 that ,| are not mixed in a model group
24 content of literals
25
26*/
27
28static const char KW_ANY[] = {
29    ASCII_A, ASCII_N, ASCII_Y, '\0' };
30static const char KW_ATTLIST[] = {
31    ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
32static const char KW_CDATA[] = {
33    ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
34static const char KW_DOCTYPE[] = {
35    ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
36static const char KW_ELEMENT[] = {
37    ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
38static const char KW_EMPTY[] = {
39    ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
40static const char KW_ENTITIES[] = {
41    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
42    '\0' };
43static const char KW_ENTITY[] = {
44    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
45static const char KW_FIXED[] = {
46    ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
47static const char KW_ID[] = {
48    ASCII_I, ASCII_D, '\0' };
49static const char KW_IDREF[] = {
50    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
51static const char KW_IDREFS[] = {
52    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
53static const char KW_IGNORE[] = {
54    ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
55static const char KW_IMPLIED[] = {
56    ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
57static const char KW_INCLUDE[] = {
58    ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
59static const char KW_NDATA[] = {
60    ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
61static const char KW_NMTOKEN[] = {
62    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
63static const char KW_NMTOKENS[] = {
64    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
65    '\0' };
66static const char KW_NOTATION[] =
67    { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
68      '\0' };
69static const char KW_PCDATA[] = {
70    ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
71static const char KW_PUBLIC[] = {
72    ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
73static const char KW_REQUIRED[] = {
74    ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
75    '\0' };
76static const char KW_SYSTEM[] = {
77    ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
78
79#ifndef MIN_BYTES_PER_CHAR
80#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
81#endif
82
83#ifdef XML_DTD
84#define setTopLevel(state) \
85  ((state)->handler = ((state)->documentEntity \
86                       ? internalSubset \
87                       : externalSubset1))
88#else /* not XML_DTD */
89#define setTopLevel(state) ((state)->handler = internalSubset)
90#endif /* not XML_DTD */
91
92typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
93                                   int tok,
94                                   const char *ptr,
95                                   const char *end,
96                                   const ENCODING *enc);
97
98static PROLOG_HANDLER
99  prolog0, prolog1, prolog2,
100  doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
101  internalSubset,
102  entity0, entity1, entity2, entity3, entity4, entity5, entity6,
103  entity7, entity8, entity9, entity10,
104  notation0, notation1, notation2, notation3, notation4,
105  attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
106  attlist7, attlist8, attlist9,
107  element0, element1, element2, element3, element4, element5, element6,
108  element7,
109#ifdef XML_DTD
110  externalSubset0, externalSubset1,
111  condSect0, condSect1, condSect2,
112#endif /* XML_DTD */
113  declClose,
114  error;
115
116static int FASTCALL common(PROLOG_STATE *state, int tok);
117
118static int PTRCALL
119prolog0(PROLOG_STATE *state,
120        int tok,
121        const char *ptr,
122        const char *end,
123        const ENCODING *enc)
124{
125  switch (tok) {
126  case XML_TOK_PROLOG_S:
127    state->handler = prolog1;
128    return XML_ROLE_NONE;
129  case XML_TOK_XML_DECL:
130    state->handler = prolog1;
131    return XML_ROLE_XML_DECL;
132  case XML_TOK_PI:
133    state->handler = prolog1;
134    return XML_ROLE_PI;
135  case XML_TOK_COMMENT:
136    state->handler = prolog1;
137    return XML_ROLE_COMMENT;
138  case XML_TOK_BOM:
139    return XML_ROLE_NONE;
140  case XML_TOK_DECL_OPEN:
141    if (!XmlNameMatchesAscii(enc,
142                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
143                             end,
144                             KW_DOCTYPE))
145      break;
146    state->handler = doctype0;
147    return XML_ROLE_DOCTYPE_NONE;
148  case XML_TOK_INSTANCE_START:
149    state->handler = error;
150    return XML_ROLE_INSTANCE_START;
151  }
152  return common(state, tok);
153}
154
155static int PTRCALL
156prolog1(PROLOG_STATE *state,
157        int tok,
158        const char *ptr,
159        const char *end,
160        const ENCODING *enc)
161{
162  switch (tok) {
163  case XML_TOK_PROLOG_S:
164    return XML_ROLE_NONE;
165  case XML_TOK_PI:
166    return XML_ROLE_PI;
167  case XML_TOK_COMMENT:
168    return XML_ROLE_COMMENT;
169  case XML_TOK_BOM:
170    return XML_ROLE_NONE;
171  case XML_TOK_DECL_OPEN:
172    if (!XmlNameMatchesAscii(enc,
173                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
174                             end,
175                             KW_DOCTYPE))
176      break;
177    state->handler = doctype0;
178    return XML_ROLE_DOCTYPE_NONE;
179  case XML_TOK_INSTANCE_START:
180    state->handler = error;
181    return XML_ROLE_INSTANCE_START;
182  }
183  return common(state, tok);
184}
185
186static int PTRCALL
187prolog2(PROLOG_STATE *state,
188        int tok,
189        const char *ptr,
190        const char *end,
191        const ENCODING *enc)
192{
193  switch (tok) {
194  case XML_TOK_PROLOG_S:
195    return XML_ROLE_NONE;
196  case XML_TOK_PI:
197    return XML_ROLE_PI;
198  case XML_TOK_COMMENT:
199    return XML_ROLE_COMMENT;
200  case XML_TOK_INSTANCE_START:
201    state->handler = error;
202    return XML_ROLE_INSTANCE_START;
203  }
204  return common(state, tok);
205}
206
207static int PTRCALL
208doctype0(PROLOG_STATE *state,
209         int tok,
210         const char *ptr,
211         const char *end,
212         const ENCODING *enc)
213{
214  switch (tok) {
215  case XML_TOK_PROLOG_S:
216    return XML_ROLE_DOCTYPE_NONE;
217  case XML_TOK_NAME:
218  case XML_TOK_PREFIXED_NAME:
219    state->handler = doctype1;
220    return XML_ROLE_DOCTYPE_NAME;
221  }
222  return common(state, tok);
223}
224
225static int PTRCALL
226doctype1(PROLOG_STATE *state,
227         int tok,
228         const char *ptr,
229         const char *end,
230         const ENCODING *enc)
231{
232  switch (tok) {
233  case XML_TOK_PROLOG_S:
234    return XML_ROLE_DOCTYPE_NONE;
235  case XML_TOK_OPEN_BRACKET:
236    state->handler = internalSubset;
237    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
238  case XML_TOK_DECL_CLOSE:
239    state->handler = prolog2;
240    return XML_ROLE_DOCTYPE_CLOSE;
241  case XML_TOK_NAME:
242    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
243      state->handler = doctype3;
244      return XML_ROLE_DOCTYPE_NONE;
245    }
246    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
247      state->handler = doctype2;
248      return XML_ROLE_DOCTYPE_NONE;
249    }
250    break;
251  }
252  return common(state, tok);
253}
254
255static int PTRCALL
256doctype2(PROLOG_STATE *state,
257         int tok,
258         const char *ptr,
259         const char *end,
260         const ENCODING *enc)
261{
262  switch (tok) {
263  case XML_TOK_PROLOG_S:
264    return XML_ROLE_DOCTYPE_NONE;
265  case XML_TOK_LITERAL:
266    state->handler = doctype3;
267    return XML_ROLE_DOCTYPE_PUBLIC_ID;
268  }
269  return common(state, tok);
270}
271
272static int PTRCALL
273doctype3(PROLOG_STATE *state,
274         int tok,
275         const char *ptr,
276         const char *end,
277         const ENCODING *enc)
278{
279  switch (tok) {
280  case XML_TOK_PROLOG_S:
281    return XML_ROLE_DOCTYPE_NONE;
282  case XML_TOK_LITERAL:
283    state->handler = doctype4;
284    return XML_ROLE_DOCTYPE_SYSTEM_ID;
285  }
286  return common(state, tok);
287}
288
289static int PTRCALL
290doctype4(PROLOG_STATE *state,
291         int tok,
292         const char *ptr,
293         const char *end,
294         const ENCODING *enc)
295{
296  switch (tok) {
297  case XML_TOK_PROLOG_S:
298    return XML_ROLE_DOCTYPE_NONE;
299  case XML_TOK_OPEN_BRACKET:
300    state->handler = internalSubset;
301    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
302  case XML_TOK_DECL_CLOSE:
303    state->handler = prolog2;
304    return XML_ROLE_DOCTYPE_CLOSE;
305  }
306  return common(state, tok);
307}
308
309static int PTRCALL
310doctype5(PROLOG_STATE *state,
311         int tok,
312         const char *ptr,
313         const char *end,
314         const ENCODING *enc)
315{
316  switch (tok) {
317  case XML_TOK_PROLOG_S:
318    return XML_ROLE_DOCTYPE_NONE;
319  case XML_TOK_DECL_CLOSE:
320    state->handler = prolog2;
321    return XML_ROLE_DOCTYPE_CLOSE;
322  }
323  return common(state, tok);
324}
325
326static int PTRCALL
327internalSubset(PROLOG_STATE *state,
328               int tok,
329               const char *ptr,
330               const char *end,
331               const ENCODING *enc)
332{
333  switch (tok) {
334  case XML_TOK_PROLOG_S:
335    return XML_ROLE_NONE;
336  case XML_TOK_DECL_OPEN:
337    if (XmlNameMatchesAscii(enc,
338                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
339                            end,
340                            KW_ENTITY)) {
341      state->handler = entity0;
342      return XML_ROLE_ENTITY_NONE;
343    }
344    if (XmlNameMatchesAscii(enc,
345                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
346                            end,
347                            KW_ATTLIST)) {
348      state->handler = attlist0;
349      return XML_ROLE_ATTLIST_NONE;
350    }
351    if (XmlNameMatchesAscii(enc,
352                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
353                            end,
354                            KW_ELEMENT)) {
355      state->handler = element0;
356      return XML_ROLE_ELEMENT_NONE;
357    }
358    if (XmlNameMatchesAscii(enc,
359                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
360                            end,
361                            KW_NOTATION)) {
362      state->handler = notation0;
363      return XML_ROLE_NOTATION_NONE;
364    }
365    break;
366  case XML_TOK_PI:
367    return XML_ROLE_PI;
368  case XML_TOK_COMMENT:
369    return XML_ROLE_COMMENT;
370  case XML_TOK_PARAM_ENTITY_REF:
371    return XML_ROLE_PARAM_ENTITY_REF;
372  case XML_TOK_CLOSE_BRACKET:
373    state->handler = doctype5;
374    return XML_ROLE_DOCTYPE_NONE;
375  }
376  return common(state, tok);
377}
378
379#ifdef XML_DTD
380
381static int PTRCALL
382externalSubset0(PROLOG_STATE *state,
383                int tok,
384                const char *ptr,
385                const char *end,
386                const ENCODING *enc)
387{
388  state->handler = externalSubset1;
389  if (tok == XML_TOK_XML_DECL)
390    return XML_ROLE_TEXT_DECL;
391  return externalSubset1(state, tok, ptr, end, enc);
392}
393
394static int PTRCALL
395externalSubset1(PROLOG_STATE *state,
396                int tok,
397                const char *ptr,
398                const char *end,
399                const ENCODING *enc)
400{
401  switch (tok) {
402  case XML_TOK_COND_SECT_OPEN:
403    state->handler = condSect0;
404    return XML_ROLE_NONE;
405  case XML_TOK_COND_SECT_CLOSE:
406    if (state->includeLevel == 0)
407      break;
408    state->includeLevel -= 1;
409    return XML_ROLE_NONE;
410  case XML_TOK_PROLOG_S:
411    return XML_ROLE_NONE;
412  case XML_TOK_CLOSE_BRACKET:
413    break;
414  case XML_TOK_NONE:
415    if (state->includeLevel)
416      break;
417    return XML_ROLE_NONE;
418  default:
419    return internalSubset(state, tok, ptr, end, enc);
420  }
421  return common(state, tok);
422}
423
424#endif /* XML_DTD */
425
426static int PTRCALL
427entity0(PROLOG_STATE *state,
428        int tok,
429        const char *ptr,
430        const char *end,
431        const ENCODING *enc)
432{
433  switch (tok) {
434  case XML_TOK_PROLOG_S:
435    return XML_ROLE_ENTITY_NONE;
436  case XML_TOK_PERCENT:
437    state->handler = entity1;
438    return XML_ROLE_ENTITY_NONE;
439  case XML_TOK_NAME:
440    state->handler = entity2;
441    return XML_ROLE_GENERAL_ENTITY_NAME;
442  }
443  return common(state, tok);
444}
445
446static int PTRCALL
447entity1(PROLOG_STATE *state,
448        int tok,
449        const char *ptr,
450        const char *end,
451        const ENCODING *enc)
452{
453  switch (tok) {
454  case XML_TOK_PROLOG_S:
455    return XML_ROLE_ENTITY_NONE;
456  case XML_TOK_NAME:
457    state->handler = entity7;
458    return XML_ROLE_PARAM_ENTITY_NAME;
459  }
460  return common(state, tok);
461}
462
463static int PTRCALL
464entity2(PROLOG_STATE *state,
465        int tok,
466        const char *ptr,
467        const char *end,
468        const ENCODING *enc)
469{
470  switch (tok) {
471  case XML_TOK_PROLOG_S:
472    return XML_ROLE_ENTITY_NONE;
473  case XML_TOK_NAME:
474    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
475      state->handler = entity4;
476      return XML_ROLE_ENTITY_NONE;
477    }
478    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
479      state->handler = entity3;
480      return XML_ROLE_ENTITY_NONE;
481    }
482    break;
483  case XML_TOK_LITERAL:
484    state->handler = declClose;
485    state->role_none = XML_ROLE_ENTITY_NONE;
486    return XML_ROLE_ENTITY_VALUE;
487  }
488  return common(state, tok);
489}
490
491static int PTRCALL
492entity3(PROLOG_STATE *state,
493        int tok,
494        const char *ptr,
495        const char *end,
496        const ENCODING *enc)
497{
498  switch (tok) {
499  case XML_TOK_PROLOG_S:
500    return XML_ROLE_ENTITY_NONE;
501  case XML_TOK_LITERAL:
502    state->handler = entity4;
503    return XML_ROLE_ENTITY_PUBLIC_ID;
504  }
505  return common(state, tok);
506}
507
508static int PTRCALL
509entity4(PROLOG_STATE *state,
510        int tok,
511        const char *ptr,
512        const char *end,
513        const ENCODING *enc)
514{
515  switch (tok) {
516  case XML_TOK_PROLOG_S:
517    return XML_ROLE_ENTITY_NONE;
518  case XML_TOK_LITERAL:
519    state->handler = entity5;
520    return XML_ROLE_ENTITY_SYSTEM_ID;
521  }
522  return common(state, tok);
523}
524
525static int PTRCALL
526entity5(PROLOG_STATE *state,
527        int tok,
528        const char *ptr,
529        const char *end,
530        const ENCODING *enc)
531{
532  switch (tok) {
533  case XML_TOK_PROLOG_S:
534    return XML_ROLE_ENTITY_NONE;
535  case XML_TOK_DECL_CLOSE:
536    setTopLevel(state);
537    return XML_ROLE_ENTITY_COMPLETE;
538  case XML_TOK_NAME:
539    if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
540      state->handler = entity6;
541      return XML_ROLE_ENTITY_NONE;
542    }
543    break;
544  }
545  return common(state, tok);
546}
547
548static int PTRCALL
549entity6(PROLOG_STATE *state,
550        int tok,
551        const char *ptr,
552        const char *end,
553        const ENCODING *enc)
554{
555  switch (tok) {
556  case XML_TOK_PROLOG_S:
557    return XML_ROLE_ENTITY_NONE;
558  case XML_TOK_NAME:
559    state->handler = declClose;
560    state->role_none = XML_ROLE_ENTITY_NONE;
561    return XML_ROLE_ENTITY_NOTATION_NAME;
562  }
563  return common(state, tok);
564}
565
566static int PTRCALL
567entity7(PROLOG_STATE *state,
568        int tok,
569        const char *ptr,
570        const char *end,
571        const ENCODING *enc)
572{
573  switch (tok) {
574  case XML_TOK_PROLOG_S:
575    return XML_ROLE_ENTITY_NONE;
576  case XML_TOK_NAME:
577    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
578      state->handler = entity9;
579      return XML_ROLE_ENTITY_NONE;
580    }
581    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
582      state->handler = entity8;
583      return XML_ROLE_ENTITY_NONE;
584    }
585    break;
586  case XML_TOK_LITERAL:
587    state->handler = declClose;
588    state->role_none = XML_ROLE_ENTITY_NONE;
589    return XML_ROLE_ENTITY_VALUE;
590  }
591  return common(state, tok);
592}
593
594static int PTRCALL
595entity8(PROLOG_STATE *state,
596        int tok,
597        const char *ptr,
598        const char *end,
599        const ENCODING *enc)
600{
601  switch (tok) {
602  case XML_TOK_PROLOG_S:
603    return XML_ROLE_ENTITY_NONE;
604  case XML_TOK_LITERAL:
605    state->handler = entity9;
606    return XML_ROLE_ENTITY_PUBLIC_ID;
607  }
608  return common(state, tok);
609}
610
611static int PTRCALL
612entity9(PROLOG_STATE *state,
613        int tok,
614        const char *ptr,
615        const char *end,
616        const ENCODING *enc)
617{
618  switch (tok) {
619  case XML_TOK_PROLOG_S:
620    return XML_ROLE_ENTITY_NONE;
621  case XML_TOK_LITERAL:
622    state->handler = entity10;
623    return XML_ROLE_ENTITY_SYSTEM_ID;
624  }
625  return common(state, tok);
626}
627
628static int PTRCALL
629entity10(PROLOG_STATE *state,
630         int tok,
631         const char *ptr,
632         const char *end,
633         const ENCODING *enc)
634{
635  switch (tok) {
636  case XML_TOK_PROLOG_S:
637    return XML_ROLE_ENTITY_NONE;
638  case XML_TOK_DECL_CLOSE:
639    setTopLevel(state);
640    return XML_ROLE_ENTITY_COMPLETE;
641  }
642  return common(state, tok);
643}
644
645static int PTRCALL
646notation0(PROLOG_STATE *state,
647          int tok,
648          const char *ptr,
649          const char *end,
650          const ENCODING *enc)
651{
652  switch (tok) {
653  case XML_TOK_PROLOG_S:
654    return XML_ROLE_NOTATION_NONE;
655  case XML_TOK_NAME:
656    state->handler = notation1;
657    return XML_ROLE_NOTATION_NAME;
658  }
659  return common(state, tok);
660}
661
662static int PTRCALL
663notation1(PROLOG_STATE *state,
664          int tok,
665          const char *ptr,
666          const char *end,
667          const ENCODING *enc)
668{
669  switch (tok) {
670  case XML_TOK_PROLOG_S:
671    return XML_ROLE_NOTATION_NONE;
672  case XML_TOK_NAME:
673    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
674      state->handler = notation3;
675      return XML_ROLE_NOTATION_NONE;
676    }
677    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
678      state->handler = notation2;
679      return XML_ROLE_NOTATION_NONE;
680    }
681    break;
682  }
683  return common(state, tok);
684}
685
686static int PTRCALL
687notation2(PROLOG_STATE *state,
688          int tok,
689          const char *ptr,
690          const char *end,
691          const ENCODING *enc)
692{
693  switch (tok) {
694  case XML_TOK_PROLOG_S:
695    return XML_ROLE_NOTATION_NONE;
696  case XML_TOK_LITERAL:
697    state->handler = notation4;
698    return XML_ROLE_NOTATION_PUBLIC_ID;
699  }
700  return common(state, tok);
701}
702
703static int PTRCALL
704notation3(PROLOG_STATE *state,
705          int tok,
706          const char *ptr,
707          const char *end,
708          const ENCODING *enc)
709{
710  switch (tok) {
711  case XML_TOK_PROLOG_S:
712    return XML_ROLE_NOTATION_NONE;
713  case XML_TOK_LITERAL:
714    state->handler = declClose;
715    state->role_none = XML_ROLE_NOTATION_NONE;
716    return XML_ROLE_NOTATION_SYSTEM_ID;
717  }
718  return common(state, tok);
719}
720
721static int PTRCALL
722notation4(PROLOG_STATE *state,
723          int tok,
724          const char *ptr,
725          const char *end,
726          const ENCODING *enc)
727{
728  switch (tok) {
729  case XML_TOK_PROLOG_S:
730    return XML_ROLE_NOTATION_NONE;
731  case XML_TOK_LITERAL:
732    state->handler = declClose;
733    state->role_none = XML_ROLE_NOTATION_NONE;
734    return XML_ROLE_NOTATION_SYSTEM_ID;
735  case XML_TOK_DECL_CLOSE:
736    setTopLevel(state);
737    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
738  }
739  return common(state, tok);
740}
741
742static int PTRCALL
743attlist0(PROLOG_STATE *state,
744         int tok,
745         const char *ptr,
746         const char *end,
747         const ENCODING *enc)
748{
749  switch (tok) {
750  case XML_TOK_PROLOG_S:
751    return XML_ROLE_ATTLIST_NONE;
752  case XML_TOK_NAME:
753  case XML_TOK_PREFIXED_NAME:
754    state->handler = attlist1;
755    return XML_ROLE_ATTLIST_ELEMENT_NAME;
756  }
757  return common(state, tok);
758}
759
760static int PTRCALL
761attlist1(PROLOG_STATE *state,
762         int tok,
763         const char *ptr,
764         const char *end,
765         const ENCODING *enc)
766{
767  switch (tok) {
768  case XML_TOK_PROLOG_S:
769    return XML_ROLE_ATTLIST_NONE;
770  case XML_TOK_DECL_CLOSE:
771    setTopLevel(state);
772    return XML_ROLE_ATTLIST_NONE;
773  case XML_TOK_NAME:
774  case XML_TOK_PREFIXED_NAME:
775    state->handler = attlist2;
776    return XML_ROLE_ATTRIBUTE_NAME;
777  }
778  return common(state, tok);
779}
780
781static int PTRCALL
782attlist2(PROLOG_STATE *state,
783         int tok,
784         const char *ptr,
785         const char *end,
786         const ENCODING *enc)
787{
788  switch (tok) {
789  case XML_TOK_PROLOG_S:
790    return XML_ROLE_ATTLIST_NONE;
791  case XML_TOK_NAME:
792    {
793      static const char *types[] = {
794        KW_CDATA,
795        KW_ID,
796        KW_IDREF,
797        KW_IDREFS,
798        KW_ENTITY,
799        KW_ENTITIES,
800        KW_NMTOKEN,
801        KW_NMTOKENS,
802      };
803      int i;
804      for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
805        if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
806          state->handler = attlist8;
807          return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
808        }
809    }
810    if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
811      state->handler = attlist5;
812      return XML_ROLE_ATTLIST_NONE;
813    }
814    break;
815  case XML_TOK_OPEN_PAREN:
816    state->handler = attlist3;
817    return XML_ROLE_ATTLIST_NONE;
818  }
819  return common(state, tok);
820}
821
822static int PTRCALL
823attlist3(PROLOG_STATE *state,
824         int tok,
825         const char *ptr,
826         const char *end,
827         const ENCODING *enc)
828{
829  switch (tok) {
830  case XML_TOK_PROLOG_S:
831    return XML_ROLE_ATTLIST_NONE;
832  case XML_TOK_NMTOKEN:
833  case XML_TOK_NAME:
834  case XML_TOK_PREFIXED_NAME:
835    state->handler = attlist4;
836    return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
837  }
838  return common(state, tok);
839}
840
841static int PTRCALL
842attlist4(PROLOG_STATE *state,
843         int tok,
844         const char *ptr,
845         const char *end,
846         const ENCODING *enc)
847{
848  switch (tok) {
849  case XML_TOK_PROLOG_S:
850    return XML_ROLE_ATTLIST_NONE;
851  case XML_TOK_CLOSE_PAREN:
852    state->handler = attlist8;
853    return XML_ROLE_ATTLIST_NONE;
854  case XML_TOK_OR:
855    state->handler = attlist3;
856    return XML_ROLE_ATTLIST_NONE;
857  }
858  return common(state, tok);
859}
860
861static int PTRCALL
862attlist5(PROLOG_STATE *state,
863         int tok,
864         const char *ptr,
865         const char *end,
866         const ENCODING *enc)
867{
868  switch (tok) {
869  case XML_TOK_PROLOG_S:
870    return XML_ROLE_ATTLIST_NONE;
871  case XML_TOK_OPEN_PAREN:
872    state->handler = attlist6;
873    return XML_ROLE_ATTLIST_NONE;
874  }
875  return common(state, tok);
876}
877
878static int PTRCALL
879attlist6(PROLOG_STATE *state,
880         int tok,
881         const char *ptr,
882         const char *end,
883         const ENCODING *enc)
884{
885  switch (tok) {
886  case XML_TOK_PROLOG_S:
887    return XML_ROLE_ATTLIST_NONE;
888  case XML_TOK_NAME:
889    state->handler = attlist7;
890    return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
891  }
892  return common(state, tok);
893}
894
895static int PTRCALL
896attlist7(PROLOG_STATE *state,
897         int tok,
898         const char *ptr,
899         const char *end,
900         const ENCODING *enc)
901{
902  switch (tok) {
903  case XML_TOK_PROLOG_S:
904    return XML_ROLE_ATTLIST_NONE;
905  case XML_TOK_CLOSE_PAREN:
906    state->handler = attlist8;
907    return XML_ROLE_ATTLIST_NONE;
908  case XML_TOK_OR:
909    state->handler = attlist6;
910    return XML_ROLE_ATTLIST_NONE;
911  }
912  return common(state, tok);
913}
914
915/* default value */
916static int PTRCALL
917attlist8(PROLOG_STATE *state,
918         int tok,
919         const char *ptr,
920         const char *end,
921         const ENCODING *enc)
922{
923  switch (tok) {
924  case XML_TOK_PROLOG_S:
925    return XML_ROLE_ATTLIST_NONE;
926  case XML_TOK_POUND_NAME:
927    if (XmlNameMatchesAscii(enc,
928                            ptr + MIN_BYTES_PER_CHAR(enc),
929                            end,
930                            KW_IMPLIED)) {
931      state->handler = attlist1;
932      return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
933    }
934    if (XmlNameMatchesAscii(enc,
935                            ptr + MIN_BYTES_PER_CHAR(enc),
936                            end,
937                            KW_REQUIRED)) {
938      state->handler = attlist1;
939      return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
940    }
941    if (XmlNameMatchesAscii(enc,
942                            ptr + MIN_BYTES_PER_CHAR(enc),
943                            end,
944                            KW_FIXED)) {
945      state->handler = attlist9;
946      return XML_ROLE_ATTLIST_NONE;
947    }
948    break;
949  case XML_TOK_LITERAL:
950    state->handler = attlist1;
951    return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
952  }
953  return common(state, tok);
954}
955
956static int PTRCALL
957attlist9(PROLOG_STATE *state,
958         int tok,
959         const char *ptr,
960         const char *end,
961         const ENCODING *enc)
962{
963  switch (tok) {
964  case XML_TOK_PROLOG_S:
965    return XML_ROLE_ATTLIST_NONE;
966  case XML_TOK_LITERAL:
967    state->handler = attlist1;
968    return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
969  }
970  return common(state, tok);
971}
972
973static int PTRCALL
974element0(PROLOG_STATE *state,
975         int tok,
976         const char *ptr,
977         const char *end,
978         const ENCODING *enc)
979{
980  switch (tok) {
981  case XML_TOK_PROLOG_S:
982    return XML_ROLE_ELEMENT_NONE;
983  case XML_TOK_NAME:
984  case XML_TOK_PREFIXED_NAME:
985    state->handler = element1;
986    return XML_ROLE_ELEMENT_NAME;
987  }
988  return common(state, tok);
989}
990
991static int PTRCALL
992element1(PROLOG_STATE *state,
993         int tok,
994         const char *ptr,
995         const char *end,
996         const ENCODING *enc)
997{
998  switch (tok) {
999  case XML_TOK_PROLOG_S:
1000    return XML_ROLE_ELEMENT_NONE;
1001  case XML_TOK_NAME:
1002    if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1003      state->handler = declClose;
1004      state->role_none = XML_ROLE_ELEMENT_NONE;
1005      return XML_ROLE_CONTENT_EMPTY;
1006    }
1007    if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1008      state->handler = declClose;
1009      state->role_none = XML_ROLE_ELEMENT_NONE;
1010      return XML_ROLE_CONTENT_ANY;
1011    }
1012    break;
1013  case XML_TOK_OPEN_PAREN:
1014    state->handler = element2;
1015    state->level = 1;
1016    return XML_ROLE_GROUP_OPEN;
1017  }
1018  return common(state, tok);
1019}
1020
1021static int PTRCALL
1022element2(PROLOG_STATE *state,
1023         int tok,
1024         const char *ptr,
1025         const char *end,
1026         const ENCODING *enc)
1027{
1028  switch (tok) {
1029  case XML_TOK_PROLOG_S:
1030    return XML_ROLE_ELEMENT_NONE;
1031  case XML_TOK_POUND_NAME:
1032    if (XmlNameMatchesAscii(enc,
1033                            ptr + MIN_BYTES_PER_CHAR(enc),
1034                            end,
1035                            KW_PCDATA)) {
1036      state->handler = element3;
1037      return XML_ROLE_CONTENT_PCDATA;
1038    }
1039    break;
1040  case XML_TOK_OPEN_PAREN:
1041    state->level = 2;
1042    state->handler = element6;
1043    return XML_ROLE_GROUP_OPEN;
1044  case XML_TOK_NAME:
1045  case XML_TOK_PREFIXED_NAME:
1046    state->handler = element7;
1047    return XML_ROLE_CONTENT_ELEMENT;
1048  case XML_TOK_NAME_QUESTION:
1049    state->handler = element7;
1050    return XML_ROLE_CONTENT_ELEMENT_OPT;
1051  case XML_TOK_NAME_ASTERISK:
1052    state->handler = element7;
1053    return XML_ROLE_CONTENT_ELEMENT_REP;
1054  case XML_TOK_NAME_PLUS:
1055    state->handler = element7;
1056    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1057  }
1058  return common(state, tok);
1059}
1060
1061static int PTRCALL
1062element3(PROLOG_STATE *state,
1063         int tok,
1064         const char *ptr,
1065         const char *end,
1066         const ENCODING *enc)
1067{
1068  switch (tok) {
1069  case XML_TOK_PROLOG_S:
1070    return XML_ROLE_ELEMENT_NONE;
1071  case XML_TOK_CLOSE_PAREN:
1072    state->handler = declClose;
1073    state->role_none = XML_ROLE_ELEMENT_NONE;
1074    return XML_ROLE_GROUP_CLOSE;
1075  case XML_TOK_CLOSE_PAREN_ASTERISK:
1076    state->handler = declClose;
1077    state->role_none = XML_ROLE_ELEMENT_NONE;
1078    return XML_ROLE_GROUP_CLOSE_REP;
1079  case XML_TOK_OR:
1080    state->handler = element4;
1081    return XML_ROLE_ELEMENT_NONE;
1082  }
1083  return common(state, tok);
1084}
1085
1086static int PTRCALL
1087element4(PROLOG_STATE *state,
1088         int tok,
1089         const char *ptr,
1090         const char *end,
1091         const ENCODING *enc)
1092{
1093  switch (tok) {
1094  case XML_TOK_PROLOG_S:
1095    return XML_ROLE_ELEMENT_NONE;
1096  case XML_TOK_NAME:
1097  case XML_TOK_PREFIXED_NAME:
1098    state->handler = element5;
1099    return XML_ROLE_CONTENT_ELEMENT;
1100  }
1101  return common(state, tok);
1102}
1103
1104static int PTRCALL
1105element5(PROLOG_STATE *state,
1106         int tok,
1107         const char *ptr,
1108         const char *end,
1109         const ENCODING *enc)
1110{
1111  switch (tok) {
1112  case XML_TOK_PROLOG_S:
1113    return XML_ROLE_ELEMENT_NONE;
1114  case XML_TOK_CLOSE_PAREN_ASTERISK:
1115    state->handler = declClose;
1116    state->role_none = XML_ROLE_ELEMENT_NONE;
1117    return XML_ROLE_GROUP_CLOSE_REP;
1118  case XML_TOK_OR:
1119    state->handler = element4;
1120    return XML_ROLE_ELEMENT_NONE;
1121  }
1122  return common(state, tok);
1123}
1124
1125static int PTRCALL
1126element6(PROLOG_STATE *state,
1127         int tok,
1128         const char *ptr,
1129         const char *end,
1130         const ENCODING *enc)
1131{
1132  switch (tok) {
1133  case XML_TOK_PROLOG_S:
1134    return XML_ROLE_ELEMENT_NONE;
1135  case XML_TOK_OPEN_PAREN:
1136    state->level += 1;
1137    return XML_ROLE_GROUP_OPEN;
1138  case XML_TOK_NAME:
1139  case XML_TOK_PREFIXED_NAME:
1140    state->handler = element7;
1141    return XML_ROLE_CONTENT_ELEMENT;
1142  case XML_TOK_NAME_QUESTION:
1143    state->handler = element7;
1144    return XML_ROLE_CONTENT_ELEMENT_OPT;
1145  case XML_TOK_NAME_ASTERISK:
1146    state->handler = element7;
1147    return XML_ROLE_CONTENT_ELEMENT_REP;
1148  case XML_TOK_NAME_PLUS:
1149    state->handler = element7;
1150    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1151  }
1152  return common(state, tok);
1153}
1154
1155static int PTRCALL
1156element7(PROLOG_STATE *state,
1157         int tok,
1158         const char *ptr,
1159         const char *end,
1160         const ENCODING *enc)
1161{
1162  switch (tok) {
1163  case XML_TOK_PROLOG_S:
1164    return XML_ROLE_ELEMENT_NONE;
1165  case XML_TOK_CLOSE_PAREN:
1166    state->level -= 1;
1167    if (state->level == 0) {
1168      state->handler = declClose;
1169      state->role_none = XML_ROLE_ELEMENT_NONE;
1170    }
1171    return XML_ROLE_GROUP_CLOSE;
1172  case XML_TOK_CLOSE_PAREN_ASTERISK:
1173    state->level -= 1;
1174    if (state->level == 0) {
1175      state->handler = declClose;
1176      state->role_none = XML_ROLE_ELEMENT_NONE;
1177    }
1178    return XML_ROLE_GROUP_CLOSE_REP;
1179  case XML_TOK_CLOSE_PAREN_QUESTION:
1180    state->level -= 1;
1181    if (state->level == 0) {
1182      state->handler = declClose;
1183      state->role_none = XML_ROLE_ELEMENT_NONE;
1184    }
1185    return XML_ROLE_GROUP_CLOSE_OPT;
1186  case XML_TOK_CLOSE_PAREN_PLUS:
1187    state->level -= 1;
1188    if (state->level == 0) {
1189      state->handler = declClose;
1190      state->role_none = XML_ROLE_ELEMENT_NONE;
1191    }
1192    return XML_ROLE_GROUP_CLOSE_PLUS;
1193  case XML_TOK_COMMA:
1194    state->handler = element6;
1195    return XML_ROLE_GROUP_SEQUENCE;
1196  case XML_TOK_OR:
1197    state->handler = element6;
1198    return XML_ROLE_GROUP_CHOICE;
1199  }
1200  return common(state, tok);
1201}
1202
1203#ifdef XML_DTD
1204
1205static int PTRCALL
1206condSect0(PROLOG_STATE *state,
1207          int tok,
1208          const char *ptr,
1209          const char *end,
1210          const ENCODING *enc)
1211{
1212  switch (tok) {
1213  case XML_TOK_PROLOG_S:
1214    return XML_ROLE_NONE;
1215  case XML_TOK_NAME:
1216    if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1217      state->handler = condSect1;
1218      return XML_ROLE_NONE;
1219    }
1220    if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1221      state->handler = condSect2;
1222      return XML_ROLE_NONE;
1223    }
1224    break;
1225  }
1226  return common(state, tok);
1227}
1228
1229static int PTRCALL
1230condSect1(PROLOG_STATE *state,
1231          int tok,
1232          const char *ptr,
1233          const char *end,
1234          const ENCODING *enc)
1235{
1236  switch (tok) {
1237  case XML_TOK_PROLOG_S:
1238    return XML_ROLE_NONE;
1239  case XML_TOK_OPEN_BRACKET:
1240    state->handler = externalSubset1;
1241    state->includeLevel += 1;
1242    return XML_ROLE_NONE;
1243  }
1244  return common(state, tok);
1245}
1246
1247static int PTRCALL
1248condSect2(PROLOG_STATE *state,
1249          int tok,
1250          const char *ptr,
1251          const char *end,
1252          const ENCODING *enc)
1253{
1254  switch (tok) {
1255  case XML_TOK_PROLOG_S:
1256    return XML_ROLE_NONE;
1257  case XML_TOK_OPEN_BRACKET:
1258    state->handler = externalSubset1;
1259    return XML_ROLE_IGNORE_SECT;
1260  }
1261  return common(state, tok);
1262}
1263
1264#endif /* XML_DTD */
1265
1266static int PTRCALL
1267declClose(PROLOG_STATE *state,
1268          int tok,
1269          const char *ptr,
1270          const char *end,
1271          const ENCODING *enc)
1272{
1273  switch (tok) {
1274  case XML_TOK_PROLOG_S:
1275    return state->role_none;
1276  case XML_TOK_DECL_CLOSE:
1277    setTopLevel(state);
1278    return state->role_none;
1279  }
1280  return common(state, tok);
1281}
1282
1283static int PTRCALL
1284error(PROLOG_STATE *state,
1285      int tok,
1286      const char *ptr,
1287      const char *end,
1288      const ENCODING *enc)
1289{
1290  return XML_ROLE_NONE;
1291}
1292
1293static int FASTCALL
1294common(PROLOG_STATE *state, int tok)
1295{
1296#ifdef XML_DTD
1297  if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1298    return XML_ROLE_INNER_PARAM_ENTITY_REF;
1299#endif
1300  state->handler = error;
1301  return XML_ROLE_ERROR;
1302}
1303
1304void
1305XmlPrologStateInit(PROLOG_STATE *state)
1306{
1307  state->handler = prolog0;
1308#ifdef XML_DTD
1309  state->documentEntity = 1;
1310  state->includeLevel = 0;
1311  state->inEntityValue = 0;
1312#endif /* XML_DTD */
1313}
1314
1315#ifdef XML_DTD
1316
1317void
1318XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1319{
1320  state->handler = externalSubset0;
1321  state->documentEntity = 0;
1322  state->includeLevel = 0;
1323}
1324
1325#endif /* XML_DTD */
1326