1/*
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10   Copyright (c) 2000-2017 Expat development team
11   Licensed under the MIT license:
12
13   Permission is  hereby granted,  free of charge,  to any  person obtaining
14   a  copy  of  this  software   and  associated  documentation  files  (the
15   "Software"),  to  deal in  the  Software  without restriction,  including
16   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17   distribute, sublicense, and/or sell copies of the Software, and to permit
18   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19   following conditions:
20
21   The above copyright  notice and this permission notice  shall be included
22   in all copies or substantial portions of the Software.
23
24   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30   USE OR OTHER DEALINGS IN THE SOFTWARE.
31*/
32
33#include <stddef.h>
34
35#ifdef _WIN32
36#  include "winconfig.h"
37#else
38#  ifdef HAVE_EXPAT_CONFIG_H
39#    include <expat_config.h>
40#  endif
41#endif /* ndef _WIN32 */
42
43#include "expat_external.h"
44#include "internal.h"
45#include "xmlrole.h"
46#include "ascii.h"
47
48/* Doesn't check:
49
50 that ,| are not mixed in a model group
51 content of literals
52
53*/
54
55static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
56static const char KW_ATTLIST[]
57    = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
58static const char KW_CDATA[]
59    = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
60static const char KW_DOCTYPE[]
61    = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
62static const char KW_ELEMENT[]
63    = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
64static const char KW_EMPTY[]
65    = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
66static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
67                                   ASCII_I, ASCII_E, ASCII_S, '\0'};
68static const char KW_ENTITY[]
69    = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
70static const char KW_FIXED[]
71    = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
72static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
73static const char KW_IDREF[]
74    = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
75static const char KW_IDREFS[]
76    = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
77#ifdef XML_DTD
78static const char KW_IGNORE[]
79    = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
80#endif
81static const char KW_IMPLIED[]
82    = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
83#ifdef XML_DTD
84static const char KW_INCLUDE[]
85    = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
86#endif
87static const char KW_NDATA[]
88    = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
89static const char KW_NMTOKEN[]
90    = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
91static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
92                                   ASCII_E, ASCII_N, ASCII_S, '\0'};
93static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
94                                   ASCII_I, ASCII_O, ASCII_N, '\0'};
95static const char KW_PCDATA[]
96    = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
97static const char KW_PUBLIC[]
98    = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
99static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
100                                   ASCII_R, ASCII_E, ASCII_D, '\0'};
101static const char KW_SYSTEM[]
102    = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
103
104#ifndef MIN_BYTES_PER_CHAR
105#  define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
106#endif
107
108#ifdef XML_DTD
109#  define setTopLevel(state)                                                   \
110    ((state)->handler                                                          \
111     = ((state)->documentEntity ? internalSubset : externalSubset1))
112#else /* not XML_DTD */
113#  define setTopLevel(state) ((state)->handler = internalSubset)
114#endif /* not XML_DTD */
115
116typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
117                                   const char *ptr, const char *end,
118                                   const ENCODING *enc);
119
120static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
121    doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
122    entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
123    notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
124    attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
125    attlist9, element0, element1, element2, element3, element4, element5,
126    element6, element7,
127#ifdef XML_DTD
128    externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
129#endif /* XML_DTD */
130    declClose, error;
131
132static int FASTCALL common(PROLOG_STATE *state, int tok);
133
134static int PTRCALL
135prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
136        const ENCODING *enc) {
137  switch (tok) {
138  case XML_TOK_PROLOG_S:
139    state->handler = prolog1;
140    return XML_ROLE_NONE;
141  case XML_TOK_XML_DECL:
142    state->handler = prolog1;
143    return XML_ROLE_XML_DECL;
144  case XML_TOK_PI:
145    state->handler = prolog1;
146    return XML_ROLE_PI;
147  case XML_TOK_COMMENT:
148    state->handler = prolog1;
149    return XML_ROLE_COMMENT;
150  case XML_TOK_BOM:
151    return XML_ROLE_NONE;
152  case XML_TOK_DECL_OPEN:
153    if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
154                              KW_DOCTYPE))
155      break;
156    state->handler = doctype0;
157    return XML_ROLE_DOCTYPE_NONE;
158  case XML_TOK_INSTANCE_START:
159    state->handler = error;
160    return XML_ROLE_INSTANCE_START;
161  }
162  return common(state, tok);
163}
164
165static int PTRCALL
166prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
167        const ENCODING *enc) {
168  switch (tok) {
169  case XML_TOK_PROLOG_S:
170    return XML_ROLE_NONE;
171  case XML_TOK_PI:
172    return XML_ROLE_PI;
173  case XML_TOK_COMMENT:
174    return XML_ROLE_COMMENT;
175  case XML_TOK_BOM:
176    /* This case can never arise.  To reach this role function, the
177     * parse must have passed through prolog0 and therefore have had
178     * some form of input, even if only a space.  At that point, a
179     * byte order mark is no longer a valid character (though
180     * technically it should be interpreted as a non-breaking space),
181     * so will be rejected by the tokenizing stages.
182     */
183    return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
184  case XML_TOK_DECL_OPEN:
185    if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
186                              KW_DOCTYPE))
187      break;
188    state->handler = doctype0;
189    return XML_ROLE_DOCTYPE_NONE;
190  case XML_TOK_INSTANCE_START:
191    state->handler = error;
192    return XML_ROLE_INSTANCE_START;
193  }
194  return common(state, tok);
195}
196
197static int PTRCALL
198prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
199        const ENCODING *enc) {
200  UNUSED_P(ptr);
201  UNUSED_P(end);
202  UNUSED_P(enc);
203  switch (tok) {
204  case XML_TOK_PROLOG_S:
205    return XML_ROLE_NONE;
206  case XML_TOK_PI:
207    return XML_ROLE_PI;
208  case XML_TOK_COMMENT:
209    return XML_ROLE_COMMENT;
210  case XML_TOK_INSTANCE_START:
211    state->handler = error;
212    return XML_ROLE_INSTANCE_START;
213  }
214  return common(state, tok);
215}
216
217static int PTRCALL
218doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
219         const ENCODING *enc) {
220  UNUSED_P(ptr);
221  UNUSED_P(end);
222  UNUSED_P(enc);
223  switch (tok) {
224  case XML_TOK_PROLOG_S:
225    return XML_ROLE_DOCTYPE_NONE;
226  case XML_TOK_NAME:
227  case XML_TOK_PREFIXED_NAME:
228    state->handler = doctype1;
229    return XML_ROLE_DOCTYPE_NAME;
230  }
231  return common(state, tok);
232}
233
234static int PTRCALL
235doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
236         const ENCODING *enc) {
237  switch (tok) {
238  case XML_TOK_PROLOG_S:
239    return XML_ROLE_DOCTYPE_NONE;
240  case XML_TOK_OPEN_BRACKET:
241    state->handler = internalSubset;
242    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
243  case XML_TOK_DECL_CLOSE:
244    state->handler = prolog2;
245    return XML_ROLE_DOCTYPE_CLOSE;
246  case XML_TOK_NAME:
247    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
248      state->handler = doctype3;
249      return XML_ROLE_DOCTYPE_NONE;
250    }
251    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
252      state->handler = doctype2;
253      return XML_ROLE_DOCTYPE_NONE;
254    }
255    break;
256  }
257  return common(state, tok);
258}
259
260static int PTRCALL
261doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
262         const ENCODING *enc) {
263  UNUSED_P(ptr);
264  UNUSED_P(end);
265  UNUSED_P(enc);
266  switch (tok) {
267  case XML_TOK_PROLOG_S:
268    return XML_ROLE_DOCTYPE_NONE;
269  case XML_TOK_LITERAL:
270    state->handler = doctype3;
271    return XML_ROLE_DOCTYPE_PUBLIC_ID;
272  }
273  return common(state, tok);
274}
275
276static int PTRCALL
277doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
278         const ENCODING *enc) {
279  UNUSED_P(ptr);
280  UNUSED_P(end);
281  UNUSED_P(enc);
282  switch (tok) {
283  case XML_TOK_PROLOG_S:
284    return XML_ROLE_DOCTYPE_NONE;
285  case XML_TOK_LITERAL:
286    state->handler = doctype4;
287    return XML_ROLE_DOCTYPE_SYSTEM_ID;
288  }
289  return common(state, tok);
290}
291
292static int PTRCALL
293doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
294         const ENCODING *enc) {
295  UNUSED_P(ptr);
296  UNUSED_P(end);
297  UNUSED_P(enc);
298  switch (tok) {
299  case XML_TOK_PROLOG_S:
300    return XML_ROLE_DOCTYPE_NONE;
301  case XML_TOK_OPEN_BRACKET:
302    state->handler = internalSubset;
303    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
304  case XML_TOK_DECL_CLOSE:
305    state->handler = prolog2;
306    return XML_ROLE_DOCTYPE_CLOSE;
307  }
308  return common(state, tok);
309}
310
311static int PTRCALL
312doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
313         const ENCODING *enc) {
314  UNUSED_P(ptr);
315  UNUSED_P(end);
316  UNUSED_P(enc);
317  switch (tok) {
318  case XML_TOK_PROLOG_S:
319    return XML_ROLE_DOCTYPE_NONE;
320  case XML_TOK_DECL_CLOSE:
321    state->handler = prolog2;
322    return XML_ROLE_DOCTYPE_CLOSE;
323  }
324  return common(state, tok);
325}
326
327static int PTRCALL
328internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
329               const ENCODING *enc) {
330  switch (tok) {
331  case XML_TOK_PROLOG_S:
332    return XML_ROLE_NONE;
333  case XML_TOK_DECL_OPEN:
334    if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
335                            KW_ENTITY)) {
336      state->handler = entity0;
337      return XML_ROLE_ENTITY_NONE;
338    }
339    if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
340                            KW_ATTLIST)) {
341      state->handler = attlist0;
342      return XML_ROLE_ATTLIST_NONE;
343    }
344    if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
345                            KW_ELEMENT)) {
346      state->handler = element0;
347      return XML_ROLE_ELEMENT_NONE;
348    }
349    if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
350                            KW_NOTATION)) {
351      state->handler = notation0;
352      return XML_ROLE_NOTATION_NONE;
353    }
354    break;
355  case XML_TOK_PI:
356    return XML_ROLE_PI;
357  case XML_TOK_COMMENT:
358    return XML_ROLE_COMMENT;
359  case XML_TOK_PARAM_ENTITY_REF:
360    return XML_ROLE_PARAM_ENTITY_REF;
361  case XML_TOK_CLOSE_BRACKET:
362    state->handler = doctype5;
363    return XML_ROLE_DOCTYPE_NONE;
364  case XML_TOK_NONE:
365    return XML_ROLE_NONE;
366  }
367  return common(state, tok);
368}
369
370#ifdef XML_DTD
371
372static int PTRCALL
373externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
374                const ENCODING *enc) {
375  state->handler = externalSubset1;
376  if (tok == XML_TOK_XML_DECL)
377    return XML_ROLE_TEXT_DECL;
378  return externalSubset1(state, tok, ptr, end, enc);
379}
380
381static int PTRCALL
382externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
383                const ENCODING *enc) {
384  switch (tok) {
385  case XML_TOK_COND_SECT_OPEN:
386    state->handler = condSect0;
387    return XML_ROLE_NONE;
388  case XML_TOK_COND_SECT_CLOSE:
389    if (state->includeLevel == 0)
390      break;
391    state->includeLevel -= 1;
392    return XML_ROLE_NONE;
393  case XML_TOK_PROLOG_S:
394    return XML_ROLE_NONE;
395  case XML_TOK_CLOSE_BRACKET:
396    break;
397  case XML_TOK_NONE:
398    if (state->includeLevel)
399      break;
400    return XML_ROLE_NONE;
401  default:
402    return internalSubset(state, tok, ptr, end, enc);
403  }
404  return common(state, tok);
405}
406
407#endif /* XML_DTD */
408
409static int PTRCALL
410entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
411        const ENCODING *enc) {
412  UNUSED_P(ptr);
413  UNUSED_P(end);
414  UNUSED_P(enc);
415  switch (tok) {
416  case XML_TOK_PROLOG_S:
417    return XML_ROLE_ENTITY_NONE;
418  case XML_TOK_PERCENT:
419    state->handler = entity1;
420    return XML_ROLE_ENTITY_NONE;
421  case XML_TOK_NAME:
422    state->handler = entity2;
423    return XML_ROLE_GENERAL_ENTITY_NAME;
424  }
425  return common(state, tok);
426}
427
428static int PTRCALL
429entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
430        const ENCODING *enc) {
431  UNUSED_P(ptr);
432  UNUSED_P(end);
433  UNUSED_P(enc);
434  switch (tok) {
435  case XML_TOK_PROLOG_S:
436    return XML_ROLE_ENTITY_NONE;
437  case XML_TOK_NAME:
438    state->handler = entity7;
439    return XML_ROLE_PARAM_ENTITY_NAME;
440  }
441  return common(state, tok);
442}
443
444static int PTRCALL
445entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
446        const ENCODING *enc) {
447  switch (tok) {
448  case XML_TOK_PROLOG_S:
449    return XML_ROLE_ENTITY_NONE;
450  case XML_TOK_NAME:
451    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
452      state->handler = entity4;
453      return XML_ROLE_ENTITY_NONE;
454    }
455    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
456      state->handler = entity3;
457      return XML_ROLE_ENTITY_NONE;
458    }
459    break;
460  case XML_TOK_LITERAL:
461    state->handler = declClose;
462    state->role_none = XML_ROLE_ENTITY_NONE;
463    return XML_ROLE_ENTITY_VALUE;
464  }
465  return common(state, tok);
466}
467
468static int PTRCALL
469entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
470        const ENCODING *enc) {
471  UNUSED_P(ptr);
472  UNUSED_P(end);
473  UNUSED_P(enc);
474  switch (tok) {
475  case XML_TOK_PROLOG_S:
476    return XML_ROLE_ENTITY_NONE;
477  case XML_TOK_LITERAL:
478    state->handler = entity4;
479    return XML_ROLE_ENTITY_PUBLIC_ID;
480  }
481  return common(state, tok);
482}
483
484static int PTRCALL
485entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
486        const ENCODING *enc) {
487  UNUSED_P(ptr);
488  UNUSED_P(end);
489  UNUSED_P(enc);
490  switch (tok) {
491  case XML_TOK_PROLOG_S:
492    return XML_ROLE_ENTITY_NONE;
493  case XML_TOK_LITERAL:
494    state->handler = entity5;
495    return XML_ROLE_ENTITY_SYSTEM_ID;
496  }
497  return common(state, tok);
498}
499
500static int PTRCALL
501entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
502        const ENCODING *enc) {
503  switch (tok) {
504  case XML_TOK_PROLOG_S:
505    return XML_ROLE_ENTITY_NONE;
506  case XML_TOK_DECL_CLOSE:
507    setTopLevel(state);
508    return XML_ROLE_ENTITY_COMPLETE;
509  case XML_TOK_NAME:
510    if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
511      state->handler = entity6;
512      return XML_ROLE_ENTITY_NONE;
513    }
514    break;
515  }
516  return common(state, tok);
517}
518
519static int PTRCALL
520entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
521        const ENCODING *enc) {
522  UNUSED_P(ptr);
523  UNUSED_P(end);
524  UNUSED_P(enc);
525  switch (tok) {
526  case XML_TOK_PROLOG_S:
527    return XML_ROLE_ENTITY_NONE;
528  case XML_TOK_NAME:
529    state->handler = declClose;
530    state->role_none = XML_ROLE_ENTITY_NONE;
531    return XML_ROLE_ENTITY_NOTATION_NAME;
532  }
533  return common(state, tok);
534}
535
536static int PTRCALL
537entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
538        const ENCODING *enc) {
539  switch (tok) {
540  case XML_TOK_PROLOG_S:
541    return XML_ROLE_ENTITY_NONE;
542  case XML_TOK_NAME:
543    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
544      state->handler = entity9;
545      return XML_ROLE_ENTITY_NONE;
546    }
547    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
548      state->handler = entity8;
549      return XML_ROLE_ENTITY_NONE;
550    }
551    break;
552  case XML_TOK_LITERAL:
553    state->handler = declClose;
554    state->role_none = XML_ROLE_ENTITY_NONE;
555    return XML_ROLE_ENTITY_VALUE;
556  }
557  return common(state, tok);
558}
559
560static int PTRCALL
561entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
562        const ENCODING *enc) {
563  UNUSED_P(ptr);
564  UNUSED_P(end);
565  UNUSED_P(enc);
566  switch (tok) {
567  case XML_TOK_PROLOG_S:
568    return XML_ROLE_ENTITY_NONE;
569  case XML_TOK_LITERAL:
570    state->handler = entity9;
571    return XML_ROLE_ENTITY_PUBLIC_ID;
572  }
573  return common(state, tok);
574}
575
576static int PTRCALL
577entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
578        const ENCODING *enc) {
579  UNUSED_P(ptr);
580  UNUSED_P(end);
581  UNUSED_P(enc);
582  switch (tok) {
583  case XML_TOK_PROLOG_S:
584    return XML_ROLE_ENTITY_NONE;
585  case XML_TOK_LITERAL:
586    state->handler = entity10;
587    return XML_ROLE_ENTITY_SYSTEM_ID;
588  }
589  return common(state, tok);
590}
591
592static int PTRCALL
593entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
594         const ENCODING *enc) {
595  UNUSED_P(ptr);
596  UNUSED_P(end);
597  UNUSED_P(enc);
598  switch (tok) {
599  case XML_TOK_PROLOG_S:
600    return XML_ROLE_ENTITY_NONE;
601  case XML_TOK_DECL_CLOSE:
602    setTopLevel(state);
603    return XML_ROLE_ENTITY_COMPLETE;
604  }
605  return common(state, tok);
606}
607
608static int PTRCALL
609notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
610          const ENCODING *enc) {
611  UNUSED_P(ptr);
612  UNUSED_P(end);
613  UNUSED_P(enc);
614  switch (tok) {
615  case XML_TOK_PROLOG_S:
616    return XML_ROLE_NOTATION_NONE;
617  case XML_TOK_NAME:
618    state->handler = notation1;
619    return XML_ROLE_NOTATION_NAME;
620  }
621  return common(state, tok);
622}
623
624static int PTRCALL
625notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
626          const ENCODING *enc) {
627  switch (tok) {
628  case XML_TOK_PROLOG_S:
629    return XML_ROLE_NOTATION_NONE;
630  case XML_TOK_NAME:
631    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
632      state->handler = notation3;
633      return XML_ROLE_NOTATION_NONE;
634    }
635    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
636      state->handler = notation2;
637      return XML_ROLE_NOTATION_NONE;
638    }
639    break;
640  }
641  return common(state, tok);
642}
643
644static int PTRCALL
645notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
646          const ENCODING *enc) {
647  UNUSED_P(ptr);
648  UNUSED_P(end);
649  UNUSED_P(enc);
650  switch (tok) {
651  case XML_TOK_PROLOG_S:
652    return XML_ROLE_NOTATION_NONE;
653  case XML_TOK_LITERAL:
654    state->handler = notation4;
655    return XML_ROLE_NOTATION_PUBLIC_ID;
656  }
657  return common(state, tok);
658}
659
660static int PTRCALL
661notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
662          const ENCODING *enc) {
663  UNUSED_P(ptr);
664  UNUSED_P(end);
665  UNUSED_P(enc);
666  switch (tok) {
667  case XML_TOK_PROLOG_S:
668    return XML_ROLE_NOTATION_NONE;
669  case XML_TOK_LITERAL:
670    state->handler = declClose;
671    state->role_none = XML_ROLE_NOTATION_NONE;
672    return XML_ROLE_NOTATION_SYSTEM_ID;
673  }
674  return common(state, tok);
675}
676
677static int PTRCALL
678notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
679          const ENCODING *enc) {
680  UNUSED_P(ptr);
681  UNUSED_P(end);
682  UNUSED_P(enc);
683  switch (tok) {
684  case XML_TOK_PROLOG_S:
685    return XML_ROLE_NOTATION_NONE;
686  case XML_TOK_LITERAL:
687    state->handler = declClose;
688    state->role_none = XML_ROLE_NOTATION_NONE;
689    return XML_ROLE_NOTATION_SYSTEM_ID;
690  case XML_TOK_DECL_CLOSE:
691    setTopLevel(state);
692    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
693  }
694  return common(state, tok);
695}
696
697static int PTRCALL
698attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
699         const ENCODING *enc) {
700  UNUSED_P(ptr);
701  UNUSED_P(end);
702  UNUSED_P(enc);
703  switch (tok) {
704  case XML_TOK_PROLOG_S:
705    return XML_ROLE_ATTLIST_NONE;
706  case XML_TOK_NAME:
707  case XML_TOK_PREFIXED_NAME:
708    state->handler = attlist1;
709    return XML_ROLE_ATTLIST_ELEMENT_NAME;
710  }
711  return common(state, tok);
712}
713
714static int PTRCALL
715attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
716         const ENCODING *enc) {
717  UNUSED_P(ptr);
718  UNUSED_P(end);
719  UNUSED_P(enc);
720  switch (tok) {
721  case XML_TOK_PROLOG_S:
722    return XML_ROLE_ATTLIST_NONE;
723  case XML_TOK_DECL_CLOSE:
724    setTopLevel(state);
725    return XML_ROLE_ATTLIST_NONE;
726  case XML_TOK_NAME:
727  case XML_TOK_PREFIXED_NAME:
728    state->handler = attlist2;
729    return XML_ROLE_ATTRIBUTE_NAME;
730  }
731  return common(state, tok);
732}
733
734static int PTRCALL
735attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
736         const ENCODING *enc) {
737  switch (tok) {
738  case XML_TOK_PROLOG_S:
739    return XML_ROLE_ATTLIST_NONE;
740  case XML_TOK_NAME: {
741    static const char *const types[] = {
742        KW_CDATA,  KW_ID,       KW_IDREF,   KW_IDREFS,
743        KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
744    };
745    int i;
746    for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
747      if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
748        state->handler = attlist8;
749        return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
750      }
751  }
752    if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
753      state->handler = attlist5;
754      return XML_ROLE_ATTLIST_NONE;
755    }
756    break;
757  case XML_TOK_OPEN_PAREN:
758    state->handler = attlist3;
759    return XML_ROLE_ATTLIST_NONE;
760  }
761  return common(state, tok);
762}
763
764static int PTRCALL
765attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
766         const ENCODING *enc) {
767  UNUSED_P(ptr);
768  UNUSED_P(end);
769  UNUSED_P(enc);
770  switch (tok) {
771  case XML_TOK_PROLOG_S:
772    return XML_ROLE_ATTLIST_NONE;
773  case XML_TOK_NMTOKEN:
774  case XML_TOK_NAME:
775  case XML_TOK_PREFIXED_NAME:
776    state->handler = attlist4;
777    return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
778  }
779  return common(state, tok);
780}
781
782static int PTRCALL
783attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
784         const ENCODING *enc) {
785  UNUSED_P(ptr);
786  UNUSED_P(end);
787  UNUSED_P(enc);
788  switch (tok) {
789  case XML_TOK_PROLOG_S:
790    return XML_ROLE_ATTLIST_NONE;
791  case XML_TOK_CLOSE_PAREN:
792    state->handler = attlist8;
793    return XML_ROLE_ATTLIST_NONE;
794  case XML_TOK_OR:
795    state->handler = attlist3;
796    return XML_ROLE_ATTLIST_NONE;
797  }
798  return common(state, tok);
799}
800
801static int PTRCALL
802attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
803         const ENCODING *enc) {
804  UNUSED_P(ptr);
805  UNUSED_P(end);
806  UNUSED_P(enc);
807  switch (tok) {
808  case XML_TOK_PROLOG_S:
809    return XML_ROLE_ATTLIST_NONE;
810  case XML_TOK_OPEN_PAREN:
811    state->handler = attlist6;
812    return XML_ROLE_ATTLIST_NONE;
813  }
814  return common(state, tok);
815}
816
817static int PTRCALL
818attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
819         const ENCODING *enc) {
820  UNUSED_P(ptr);
821  UNUSED_P(end);
822  UNUSED_P(enc);
823  switch (tok) {
824  case XML_TOK_PROLOG_S:
825    return XML_ROLE_ATTLIST_NONE;
826  case XML_TOK_NAME:
827    state->handler = attlist7;
828    return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
829  }
830  return common(state, tok);
831}
832
833static int PTRCALL
834attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
835         const ENCODING *enc) {
836  UNUSED_P(ptr);
837  UNUSED_P(end);
838  UNUSED_P(enc);
839  switch (tok) {
840  case XML_TOK_PROLOG_S:
841    return XML_ROLE_ATTLIST_NONE;
842  case XML_TOK_CLOSE_PAREN:
843    state->handler = attlist8;
844    return XML_ROLE_ATTLIST_NONE;
845  case XML_TOK_OR:
846    state->handler = attlist6;
847    return XML_ROLE_ATTLIST_NONE;
848  }
849  return common(state, tok);
850}
851
852/* default value */
853static int PTRCALL
854attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
855         const ENCODING *enc) {
856  switch (tok) {
857  case XML_TOK_PROLOG_S:
858    return XML_ROLE_ATTLIST_NONE;
859  case XML_TOK_POUND_NAME:
860    if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
861                            KW_IMPLIED)) {
862      state->handler = attlist1;
863      return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
864    }
865    if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
866                            KW_REQUIRED)) {
867      state->handler = attlist1;
868      return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
869    }
870    if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
871                            KW_FIXED)) {
872      state->handler = attlist9;
873      return XML_ROLE_ATTLIST_NONE;
874    }
875    break;
876  case XML_TOK_LITERAL:
877    state->handler = attlist1;
878    return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
879  }
880  return common(state, tok);
881}
882
883static int PTRCALL
884attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
885         const ENCODING *enc) {
886  UNUSED_P(ptr);
887  UNUSED_P(end);
888  UNUSED_P(enc);
889  switch (tok) {
890  case XML_TOK_PROLOG_S:
891    return XML_ROLE_ATTLIST_NONE;
892  case XML_TOK_LITERAL:
893    state->handler = attlist1;
894    return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
895  }
896  return common(state, tok);
897}
898
899static int PTRCALL
900element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
901         const ENCODING *enc) {
902  UNUSED_P(ptr);
903  UNUSED_P(end);
904  UNUSED_P(enc);
905  switch (tok) {
906  case XML_TOK_PROLOG_S:
907    return XML_ROLE_ELEMENT_NONE;
908  case XML_TOK_NAME:
909  case XML_TOK_PREFIXED_NAME:
910    state->handler = element1;
911    return XML_ROLE_ELEMENT_NAME;
912  }
913  return common(state, tok);
914}
915
916static int PTRCALL
917element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
918         const ENCODING *enc) {
919  switch (tok) {
920  case XML_TOK_PROLOG_S:
921    return XML_ROLE_ELEMENT_NONE;
922  case XML_TOK_NAME:
923    if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
924      state->handler = declClose;
925      state->role_none = XML_ROLE_ELEMENT_NONE;
926      return XML_ROLE_CONTENT_EMPTY;
927    }
928    if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
929      state->handler = declClose;
930      state->role_none = XML_ROLE_ELEMENT_NONE;
931      return XML_ROLE_CONTENT_ANY;
932    }
933    break;
934  case XML_TOK_OPEN_PAREN:
935    state->handler = element2;
936    state->level = 1;
937    return XML_ROLE_GROUP_OPEN;
938  }
939  return common(state, tok);
940}
941
942static int PTRCALL
943element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
944         const ENCODING *enc) {
945  switch (tok) {
946  case XML_TOK_PROLOG_S:
947    return XML_ROLE_ELEMENT_NONE;
948  case XML_TOK_POUND_NAME:
949    if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
950                            KW_PCDATA)) {
951      state->handler = element3;
952      return XML_ROLE_CONTENT_PCDATA;
953    }
954    break;
955  case XML_TOK_OPEN_PAREN:
956    state->level = 2;
957    state->handler = element6;
958    return XML_ROLE_GROUP_OPEN;
959  case XML_TOK_NAME:
960  case XML_TOK_PREFIXED_NAME:
961    state->handler = element7;
962    return XML_ROLE_CONTENT_ELEMENT;
963  case XML_TOK_NAME_QUESTION:
964    state->handler = element7;
965    return XML_ROLE_CONTENT_ELEMENT_OPT;
966  case XML_TOK_NAME_ASTERISK:
967    state->handler = element7;
968    return XML_ROLE_CONTENT_ELEMENT_REP;
969  case XML_TOK_NAME_PLUS:
970    state->handler = element7;
971    return XML_ROLE_CONTENT_ELEMENT_PLUS;
972  }
973  return common(state, tok);
974}
975
976static int PTRCALL
977element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
978         const ENCODING *enc) {
979  UNUSED_P(ptr);
980  UNUSED_P(end);
981  UNUSED_P(enc);
982  switch (tok) {
983  case XML_TOK_PROLOG_S:
984    return XML_ROLE_ELEMENT_NONE;
985  case XML_TOK_CLOSE_PAREN:
986    state->handler = declClose;
987    state->role_none = XML_ROLE_ELEMENT_NONE;
988    return XML_ROLE_GROUP_CLOSE;
989  case XML_TOK_CLOSE_PAREN_ASTERISK:
990    state->handler = declClose;
991    state->role_none = XML_ROLE_ELEMENT_NONE;
992    return XML_ROLE_GROUP_CLOSE_REP;
993  case XML_TOK_OR:
994    state->handler = element4;
995    return XML_ROLE_ELEMENT_NONE;
996  }
997  return common(state, tok);
998}
999
1000static int PTRCALL
1001element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1002         const ENCODING *enc) {
1003  UNUSED_P(ptr);
1004  UNUSED_P(end);
1005  UNUSED_P(enc);
1006  switch (tok) {
1007  case XML_TOK_PROLOG_S:
1008    return XML_ROLE_ELEMENT_NONE;
1009  case XML_TOK_NAME:
1010  case XML_TOK_PREFIXED_NAME:
1011    state->handler = element5;
1012    return XML_ROLE_CONTENT_ELEMENT;
1013  }
1014  return common(state, tok);
1015}
1016
1017static int PTRCALL
1018element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1019         const ENCODING *enc) {
1020  UNUSED_P(ptr);
1021  UNUSED_P(end);
1022  UNUSED_P(enc);
1023  switch (tok) {
1024  case XML_TOK_PROLOG_S:
1025    return XML_ROLE_ELEMENT_NONE;
1026  case XML_TOK_CLOSE_PAREN_ASTERISK:
1027    state->handler = declClose;
1028    state->role_none = XML_ROLE_ELEMENT_NONE;
1029    return XML_ROLE_GROUP_CLOSE_REP;
1030  case XML_TOK_OR:
1031    state->handler = element4;
1032    return XML_ROLE_ELEMENT_NONE;
1033  }
1034  return common(state, tok);
1035}
1036
1037static int PTRCALL
1038element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1039         const ENCODING *enc) {
1040  UNUSED_P(ptr);
1041  UNUSED_P(end);
1042  UNUSED_P(enc);
1043  switch (tok) {
1044  case XML_TOK_PROLOG_S:
1045    return XML_ROLE_ELEMENT_NONE;
1046  case XML_TOK_OPEN_PAREN:
1047    state->level += 1;
1048    return XML_ROLE_GROUP_OPEN;
1049  case XML_TOK_NAME:
1050  case XML_TOK_PREFIXED_NAME:
1051    state->handler = element7;
1052    return XML_ROLE_CONTENT_ELEMENT;
1053  case XML_TOK_NAME_QUESTION:
1054    state->handler = element7;
1055    return XML_ROLE_CONTENT_ELEMENT_OPT;
1056  case XML_TOK_NAME_ASTERISK:
1057    state->handler = element7;
1058    return XML_ROLE_CONTENT_ELEMENT_REP;
1059  case XML_TOK_NAME_PLUS:
1060    state->handler = element7;
1061    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1062  }
1063  return common(state, tok);
1064}
1065
1066static int PTRCALL
1067element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1068         const ENCODING *enc) {
1069  UNUSED_P(ptr);
1070  UNUSED_P(end);
1071  UNUSED_P(enc);
1072  switch (tok) {
1073  case XML_TOK_PROLOG_S:
1074    return XML_ROLE_ELEMENT_NONE;
1075  case XML_TOK_CLOSE_PAREN:
1076    state->level -= 1;
1077    if (state->level == 0) {
1078      state->handler = declClose;
1079      state->role_none = XML_ROLE_ELEMENT_NONE;
1080    }
1081    return XML_ROLE_GROUP_CLOSE;
1082  case XML_TOK_CLOSE_PAREN_ASTERISK:
1083    state->level -= 1;
1084    if (state->level == 0) {
1085      state->handler = declClose;
1086      state->role_none = XML_ROLE_ELEMENT_NONE;
1087    }
1088    return XML_ROLE_GROUP_CLOSE_REP;
1089  case XML_TOK_CLOSE_PAREN_QUESTION:
1090    state->level -= 1;
1091    if (state->level == 0) {
1092      state->handler = declClose;
1093      state->role_none = XML_ROLE_ELEMENT_NONE;
1094    }
1095    return XML_ROLE_GROUP_CLOSE_OPT;
1096  case XML_TOK_CLOSE_PAREN_PLUS:
1097    state->level -= 1;
1098    if (state->level == 0) {
1099      state->handler = declClose;
1100      state->role_none = XML_ROLE_ELEMENT_NONE;
1101    }
1102    return XML_ROLE_GROUP_CLOSE_PLUS;
1103  case XML_TOK_COMMA:
1104    state->handler = element6;
1105    return XML_ROLE_GROUP_SEQUENCE;
1106  case XML_TOK_OR:
1107    state->handler = element6;
1108    return XML_ROLE_GROUP_CHOICE;
1109  }
1110  return common(state, tok);
1111}
1112
1113#ifdef XML_DTD
1114
1115static int PTRCALL
1116condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1117          const ENCODING *enc) {
1118  switch (tok) {
1119  case XML_TOK_PROLOG_S:
1120    return XML_ROLE_NONE;
1121  case XML_TOK_NAME:
1122    if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1123      state->handler = condSect1;
1124      return XML_ROLE_NONE;
1125    }
1126    if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1127      state->handler = condSect2;
1128      return XML_ROLE_NONE;
1129    }
1130    break;
1131  }
1132  return common(state, tok);
1133}
1134
1135static int PTRCALL
1136condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1137          const ENCODING *enc) {
1138  UNUSED_P(ptr);
1139  UNUSED_P(end);
1140  UNUSED_P(enc);
1141  switch (tok) {
1142  case XML_TOK_PROLOG_S:
1143    return XML_ROLE_NONE;
1144  case XML_TOK_OPEN_BRACKET:
1145    state->handler = externalSubset1;
1146    state->includeLevel += 1;
1147    return XML_ROLE_NONE;
1148  }
1149  return common(state, tok);
1150}
1151
1152static int PTRCALL
1153condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1154          const ENCODING *enc) {
1155  UNUSED_P(ptr);
1156  UNUSED_P(end);
1157  UNUSED_P(enc);
1158  switch (tok) {
1159  case XML_TOK_PROLOG_S:
1160    return XML_ROLE_NONE;
1161  case XML_TOK_OPEN_BRACKET:
1162    state->handler = externalSubset1;
1163    return XML_ROLE_IGNORE_SECT;
1164  }
1165  return common(state, tok);
1166}
1167
1168#endif /* XML_DTD */
1169
1170static int PTRCALL
1171declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1172          const ENCODING *enc) {
1173  UNUSED_P(ptr);
1174  UNUSED_P(end);
1175  UNUSED_P(enc);
1176  switch (tok) {
1177  case XML_TOK_PROLOG_S:
1178    return state->role_none;
1179  case XML_TOK_DECL_CLOSE:
1180    setTopLevel(state);
1181    return state->role_none;
1182  }
1183  return common(state, tok);
1184}
1185
1186/* This function will only be invoked if the internal logic of the
1187 * parser has broken down.  It is used in two cases:
1188 *
1189 * 1: When the XML prolog has been finished.  At this point the
1190 * processor (the parser level above these role handlers) should
1191 * switch from prologProcessor to contentProcessor and reinitialise
1192 * the handler function.
1193 *
1194 * 2: When an error has been detected (via common() below).  At this
1195 * point again the processor should be switched to errorProcessor,
1196 * which will never call a handler.
1197 *
1198 * The result of this is that error() can only be called if the
1199 * processor switch failed to happen, which is an internal error and
1200 * therefore we shouldn't be able to provoke it simply by using the
1201 * library.  It is a necessary backstop, however, so we merely exclude
1202 * it from the coverage statistics.
1203 *
1204 * LCOV_EXCL_START
1205 */
1206static int PTRCALL
1207error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1208      const ENCODING *enc) {
1209  UNUSED_P(state);
1210  UNUSED_P(tok);
1211  UNUSED_P(ptr);
1212  UNUSED_P(end);
1213  UNUSED_P(enc);
1214  return XML_ROLE_NONE;
1215}
1216/* LCOV_EXCL_STOP */
1217
1218static int FASTCALL
1219common(PROLOG_STATE *state, int tok) {
1220#ifdef XML_DTD
1221  if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1222    return XML_ROLE_INNER_PARAM_ENTITY_REF;
1223#endif
1224  state->handler = error;
1225  return XML_ROLE_ERROR;
1226}
1227
1228void
1229XmlPrologStateInit(PROLOG_STATE *state) {
1230  state->handler = prolog0;
1231#ifdef XML_DTD
1232  state->documentEntity = 1;
1233  state->includeLevel = 0;
1234  state->inEntityValue = 0;
1235#endif /* XML_DTD */
1236}
1237
1238#ifdef XML_DTD
1239
1240void
1241XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
1242  state->handler = externalSubset0;
1243  state->documentEntity = 0;
1244  state->includeLevel = 0;
1245}
1246
1247#endif /* XML_DTD */
1248