1/*
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10   Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11   Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12   Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14   Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
15   Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
18   Licensed under the MIT license:
19
20   Permission is  hereby granted,  free of charge,  to any  person obtaining
21   a  copy  of  this  software   and  associated  documentation  files  (the
22   "Software"),  to  deal in  the  Software  without restriction,  including
23   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24   distribute, sublicense, and/or sell copies of the Software, and to permit
25   persons  to whom  the Software  is  furnished to  do so,  subject to  the
26   following conditions:
27
28   The above copyright  notice and this permission notice  shall be included
29   in all copies or substantial portions of the Software.
30
31   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37   USE OR OTHER DEALINGS IN THE SOFTWARE.
38*/
39
40#include "expat_config.h"
41
42#include <stdio.h>
43#include <stdlib.h>
44#include <stddef.h>
45#include <string.h>
46#include <fcntl.h>
47
48#ifdef _WIN32
49#  include "winconfig.h"
50#endif
51
52#include "expat.h"
53#include "internal.h" /* for UNUSED_P only */
54#include "xmlfile.h"
55#include "xmltchar.h"
56#include "filemap.h"
57
58#if defined(_MSC_VER)
59#  include <io.h>
60#endif
61
62#ifdef HAVE_UNISTD_H
63#  include <unistd.h>
64#endif
65
66#ifndef O_BINARY
67#  ifdef _O_BINARY
68#    define O_BINARY _O_BINARY
69#  else
70#    define O_BINARY 0
71#  endif
72#endif
73
74int g_read_size_bytes = 1024 * 8;
75
76typedef struct {
77  XML_Parser parser;
78  int *retPtr;
79} PROCESS_ARGS;
80
81static int processStream(const XML_Char *filename, XML_Parser parser);
82
83static void
84reportError(XML_Parser parser, const XML_Char *filename) {
85  enum XML_Error code = XML_GetErrorCode(parser);
86  const XML_Char *message = XML_ErrorString(code);
87  if (message)
88    ftprintf(stdout,
89             T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
90                 T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
91             filename, XML_GetErrorLineNumber(parser),
92             XML_GetErrorColumnNumber(parser), message);
93  else
94    ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
95}
96
97/* This implementation will give problems on files larger than INT_MAX. */
98static void
99processFile(const void *data, size_t size, const XML_Char *filename,
100            void *args) {
101  XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
102  int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
103  if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
104    reportError(parser, filename);
105    *retPtr = 0;
106  } else
107    *retPtr = 1;
108}
109
110#if defined(_WIN32)
111
112static int
113isAsciiLetter(XML_Char c) {
114  return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
115}
116
117#endif /* _WIN32 */
118
119static const XML_Char *
120resolveSystemId(const XML_Char *base, const XML_Char *systemId,
121                XML_Char **toFree) {
122  XML_Char *s;
123  *toFree = 0;
124  if (! base || *systemId == T('/')
125#if defined(_WIN32)
126      || *systemId == T('\\')
127      || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
128#endif
129  )
130    return systemId;
131  *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
132                               * sizeof(XML_Char));
133  if (! *toFree)
134    return systemId;
135  tcscpy(*toFree, base);
136  s = *toFree;
137  if (tcsrchr(s, T('/')))
138    s = tcsrchr(s, T('/')) + 1;
139#if defined(_WIN32)
140  if (tcsrchr(s, T('\\')))
141    s = tcsrchr(s, T('\\')) + 1;
142#endif
143  tcscpy(s, systemId);
144  return *toFree;
145}
146
147static int
148externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
149                         const XML_Char *base, const XML_Char *systemId,
150                         const XML_Char *publicId) {
151  int result;
152  XML_Char *s;
153  const XML_Char *filename;
154  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
155  int filemapRes;
156  PROCESS_ARGS args;
157  UNUSED_P(publicId);
158  args.retPtr = &result;
159  args.parser = entParser;
160  filename = resolveSystemId(base, systemId, &s);
161  XML_SetBase(entParser, filename);
162  filemapRes = filemap(filename, processFile, &args);
163  switch (filemapRes) {
164  case 0:
165    result = 0;
166    break;
167  case 2:
168    ftprintf(stderr,
169             T("%s: file too large for memory-mapping")
170                 T(", switching to streaming\n"),
171             filename);
172    result = processStream(filename, entParser);
173    break;
174  }
175  free(s);
176  XML_ParserFree(entParser);
177  return result;
178}
179
180static int
181processStream(const XML_Char *filename, XML_Parser parser) {
182  /* passing NULL for filename means read input from stdin */
183  int fd = 0; /* 0 is the fileno for stdin */
184
185  if (filename != NULL) {
186    fd = topen(filename, O_BINARY | O_RDONLY);
187    if (fd < 0) {
188      tperror(filename);
189      return 0;
190    }
191  }
192  for (;;) {
193    int nread;
194    char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
195    if (! buf) {
196      if (filename != NULL)
197        close(fd);
198      ftprintf(stderr, T("%s: out of memory\n"),
199               filename != NULL ? filename : T("xmlwf"));
200      return 0;
201    }
202    nread = read(fd, buf, g_read_size_bytes);
203    if (nread < 0) {
204      tperror(filename != NULL ? filename : T("STDIN"));
205      if (filename != NULL)
206        close(fd);
207      return 0;
208    }
209    if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
210      reportError(parser, filename != NULL ? filename : T("STDIN"));
211      if (filename != NULL)
212        close(fd);
213      return 0;
214    }
215    if (nread == 0) {
216      if (filename != NULL)
217        close(fd);
218      break;
219      ;
220    }
221  }
222  return 1;
223}
224
225static int
226externalEntityRefStream(XML_Parser parser, const XML_Char *context,
227                        const XML_Char *base, const XML_Char *systemId,
228                        const XML_Char *publicId) {
229  XML_Char *s;
230  const XML_Char *filename;
231  int ret;
232  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
233  UNUSED_P(publicId);
234  filename = resolveSystemId(base, systemId, &s);
235  XML_SetBase(entParser, filename);
236  ret = processStream(filename, entParser);
237  free(s);
238  XML_ParserFree(entParser);
239  return ret;
240}
241
242int
243XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
244  int result;
245
246  if (! XML_SetBase(parser, filename)) {
247    ftprintf(stderr, T("%s: out of memory"), filename);
248    exit(1);
249  }
250
251  if (flags & XML_EXTERNAL_ENTITIES)
252    XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
253                                                ? externalEntityRefFilemap
254                                                : externalEntityRefStream);
255  if (flags & XML_MAP_FILE) {
256    int filemapRes;
257    PROCESS_ARGS args;
258    args.retPtr = &result;
259    args.parser = parser;
260    filemapRes = filemap(filename, processFile, &args);
261    switch (filemapRes) {
262    case 0:
263      result = 0;
264      break;
265    case 2:
266      ftprintf(stderr,
267               T("%s: file too large for memory-mapping")
268                   T(", switching to streaming\n"),
269               filename);
270      result = processStream(filename, parser);
271      break;
272    }
273  } else
274    result = processStream(filename, parser);
275  return result;
276}
277