1/* Read an XML document from standard input and print
2   element declarations (if any) to standard output.
3   It must be used with Expat compiled for UTF-8 output.
4                            __  __            _
5                         ___\ \/ /_ __   __ _| |_
6                        / _ \\  /| '_ \ / _` | __|
7                       |  __//  \| |_) | (_| | |_
8                        \___/_/\_\ .__/ \__,_|\__|
9                                 |_| XML parser
10
11   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
12   Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
13   Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
14   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
15   Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16   Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
17   Copyright (c) 2019      Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18   Licensed under the MIT license:
19
20   Permission is  hereby granted,  free of charge,  to any  person obtaining
21   a  copy  of  this  software   and  associated  documentation  files  (the
22   "Software"),  to  deal in  the  Software  without restriction,  including
23   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24   distribute, sublicense, and/or sell copies of the Software, and to permit
25   persons  to whom  the Software  is  furnished to  do so,  subject to  the
26   following conditions:
27
28   The above copyright  notice and this permission notice  shall be included
29   in all copies or substantial portions of the Software.
30
31   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37   USE OR OTHER DEALINGS IN THE SOFTWARE.
38*/
39
40#include <stdbool.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <expat.h>
44
45#ifdef XML_LARGE_SIZE
46#  define XML_FMT_INT_MOD "ll"
47#else
48#  define XML_FMT_INT_MOD "l"
49#endif
50
51#ifdef XML_UNICODE_WCHAR_T
52#  define XML_FMT_STR "ls"
53#else
54#  define XML_FMT_STR "s"
55#endif
56
57// While traversing the XML_Content tree, we avoid recursion
58// to not be vulnerable to a denial of service attack.
59typedef struct StackStruct {
60  const XML_Content *model;
61  unsigned level;
62  struct StackStruct *prev;
63} Stack;
64
65static Stack *
66stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
67  Stack *const newStackTop = malloc(sizeof(Stack));
68  if (! newStackTop) {
69    return NULL;
70  }
71  newStackTop->model = model;
72  newStackTop->level = level;
73  newStackTop->prev = stackTop;
74  return newStackTop;
75}
76
77static Stack *
78stackPopFree(Stack *stackTop) {
79  Stack *const newStackTop = stackTop->prev;
80  free(stackTop);
81  return newStackTop;
82}
83
84static char *
85contentTypeName(enum XML_Content_Type contentType) {
86  switch (contentType) {
87  case XML_CTYPE_EMPTY:
88    return "EMPTY";
89  case XML_CTYPE_ANY:
90    return "ANY";
91  case XML_CTYPE_MIXED:
92    return "MIXED";
93  case XML_CTYPE_NAME:
94    return "NAME";
95  case XML_CTYPE_CHOICE:
96    return "CHOICE";
97  case XML_CTYPE_SEQ:
98    return "SEQ";
99  default:
100    return "???";
101  }
102}
103
104static char *
105contentQuantName(enum XML_Content_Quant contentQuant) {
106  switch (contentQuant) {
107  case XML_CQUANT_NONE:
108    return "NONE";
109  case XML_CQUANT_OPT:
110    return "OPT";
111  case XML_CQUANT_REP:
112    return "REP";
113  case XML_CQUANT_PLUS:
114    return "PLUS";
115  default:
116    return "???";
117  }
118}
119
120static void
121dumpContentModelElement(const XML_Content *model, unsigned level,
122                        const XML_Content *root) {
123  // Indent
124  unsigned u = 0;
125  for (; u < level; u++) {
126    printf("  ");
127  }
128
129  // Node
130  printf("[%u] type=%s(%d), quant=%s(%d)", (unsigned)(model - root),
131         contentTypeName(model->type), model->type,
132         contentQuantName(model->quant), model->quant);
133  if (model->name) {
134    printf(", name=\"%" XML_FMT_STR "\"", model->name);
135  } else {
136    printf(", name=NULL");
137  }
138  printf(", numchildren=%d", model->numchildren);
139  printf("\n");
140}
141
142static bool
143dumpContentModel(const XML_Char *name, const XML_Content *root) {
144  printf("Element \"%" XML_FMT_STR "\":\n", name);
145  Stack *stackTop = stackPushMalloc(NULL, root, 1);
146  if (! stackTop) {
147    return false;
148  }
149
150  while (stackTop) {
151    const XML_Content *const model = stackTop->model;
152    const unsigned level = stackTop->level;
153
154    dumpContentModelElement(model, level, root);
155
156    stackTop = stackPopFree(stackTop);
157
158    for (size_t u = model->numchildren; u >= 1; u--) {
159      Stack *const newStackTop
160          = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
161      if (! newStackTop) {
162        // We ran out of memory, so let's free all memory allocated
163        // earlier in this function, to be leak-clean:
164        while (stackTop != NULL) {
165          stackTop = stackPopFree(stackTop);
166        }
167        return false;
168      }
169      stackTop = newStackTop;
170    }
171  }
172
173  printf("\n");
174  return true;
175}
176
177static void XMLCALL
178handleElementDeclaration(void *userData, const XML_Char *name,
179                         XML_Content *model) {
180  XML_Parser parser = (XML_Parser)userData;
181  const bool success = dumpContentModel(name, model);
182  XML_FreeContentModel(parser, model);
183  if (! success) {
184    XML_StopParser(parser, /* resumable= */ XML_FALSE);
185  }
186}
187
188int
189main(void) {
190  XML_Parser parser = XML_ParserCreate(NULL);
191  int done;
192
193  if (! parser) {
194    fprintf(stderr, "Couldn't allocate memory for parser\n");
195    return 1;
196  }
197
198  XML_SetUserData(parser, parser);
199  XML_SetElementDeclHandler(parser, handleElementDeclaration);
200
201  do {
202    void *const buf = XML_GetBuffer(parser, BUFSIZ);
203    if (! buf) {
204      fprintf(stderr, "Couldn't allocate memory for buffer\n");
205      XML_ParserFree(parser);
206      return 1;
207    }
208
209    const size_t len = fread(buf, 1, BUFSIZ, stdin);
210
211    if (ferror(stdin)) {
212      fprintf(stderr, "Read error\n");
213      XML_ParserFree(parser);
214      return 1;
215    }
216
217    done = feof(stdin);
218
219    if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
220      enum XML_Error errorCode = XML_GetErrorCode(parser);
221      if (errorCode == XML_ERROR_ABORTED) {
222        errorCode = XML_ERROR_NO_MEMORY;
223      }
224      fprintf(stderr,
225              "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
226              XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
227      XML_ParserFree(parser);
228      return 1;
229    }
230  } while (! done);
231
232  XML_ParserFree(parser);
233  return 0;
234}
235