1/*
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10   Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11   Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12   Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14   Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
15   Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17   Copyright (c) 2021      Dong-hee Na <donghee.na@python.org>
18   Licensed under the MIT license:
19
20   Permission is  hereby granted,  free of charge,  to any  person obtaining
21   a  copy  of  this  software   and  associated  documentation  files  (the
22   "Software"),  to  deal in  the  Software  without restriction,  including
23   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24   distribute, sublicense, and/or sell copies of the Software, and to permit
25   persons  to whom  the Software  is  furnished to  do so,  subject to  the
26   following conditions:
27
28   The above copyright  notice and this permission notice  shall be included
29   in all copies or substantial portions of the Software.
30
31   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37   USE OR OTHER DEALINGS IN THE SOFTWARE.
38*/
39
40#include <expat_config.h>
41
42#include <stdio.h>
43#include <stdlib.h>
44#include <stddef.h>
45#include <string.h>
46#include <fcntl.h>
47
48#ifdef _WIN32
49#  include "winconfig.h"
50#endif
51
52#include "expat.h"
53#include "internal.h" /* for UNUSED_P only */
54#include "xmlfile.h"
55#include "xmltchar.h"
56#include "filemap.h"
57
58#if defined(_MSC_VER)
59#  include <io.h>
60#endif
61
62#ifdef HAVE_UNISTD_H
63#  include <unistd.h>
64#endif
65
66#ifndef O_BINARY
67#  ifdef _O_BINARY
68#    define O_BINARY _O_BINARY
69#  else
70#    define O_BINARY 0
71#  endif
72#endif
73
74#ifdef _DEBUG
75#  define READ_SIZE 16
76#else
77#  define READ_SIZE (1024 * 8)
78#endif
79
80typedef struct {
81  XML_Parser parser;
82  int *retPtr;
83} PROCESS_ARGS;
84
85static int processStream(const XML_Char *filename, XML_Parser parser);
86
87static void
88reportError(XML_Parser parser, const XML_Char *filename) {
89  enum XML_Error code = XML_GetErrorCode(parser);
90  const XML_Char *message = XML_ErrorString(code);
91  if (message)
92    ftprintf(stdout,
93             T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
94                 T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
95             filename, XML_GetErrorLineNumber(parser),
96             XML_GetErrorColumnNumber(parser), message);
97  else
98    ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
99}
100
101/* This implementation will give problems on files larger than INT_MAX. */
102static void
103processFile(const void *data, size_t size, const XML_Char *filename,
104            void *args) {
105  XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
106  int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
107  if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
108    reportError(parser, filename);
109    *retPtr = 0;
110  } else
111    *retPtr = 1;
112}
113
114#if defined(_WIN32)
115
116static int
117isAsciiLetter(XML_Char c) {
118  return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
119}
120
121#endif /* _WIN32 */
122
123static const XML_Char *
124resolveSystemId(const XML_Char *base, const XML_Char *systemId,
125                XML_Char **toFree) {
126  XML_Char *s;
127  *toFree = 0;
128  if (! base || *systemId == T('/')
129#if defined(_WIN32)
130      || *systemId == T('\\')
131      || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
132#endif
133  )
134    return systemId;
135  *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
136                               * sizeof(XML_Char));
137  if (! *toFree)
138    return systemId;
139  tcscpy(*toFree, base);
140  s = *toFree;
141  if (tcsrchr(s, T('/')))
142    s = tcsrchr(s, T('/')) + 1;
143#if defined(_WIN32)
144  if (tcsrchr(s, T('\\')))
145    s = tcsrchr(s, T('\\')) + 1;
146#endif
147  tcscpy(s, systemId);
148  return *toFree;
149}
150
151static int
152externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
153                         const XML_Char *base, const XML_Char *systemId,
154                         const XML_Char *publicId) {
155  int result;
156  XML_Char *s;
157  const XML_Char *filename;
158  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
159  int filemapRes;
160  PROCESS_ARGS args;
161  UNUSED_P(publicId);
162  args.retPtr = &result;
163  args.parser = entParser;
164  filename = resolveSystemId(base, systemId, &s);
165  XML_SetBase(entParser, filename);
166  filemapRes = filemap(filename, processFile, &args);
167  switch (filemapRes) {
168  case 0:
169    result = 0;
170    break;
171  case 2:
172    ftprintf(stderr,
173             T("%s: file too large for memory-mapping")
174                 T(", switching to streaming\n"),
175             filename);
176    result = processStream(filename, entParser);
177    break;
178  }
179  free(s);
180  XML_ParserFree(entParser);
181  return result;
182}
183
184static int
185processStream(const XML_Char *filename, XML_Parser parser) {
186  /* passing NULL for filename means read input from stdin */
187  int fd = 0; /* 0 is the fileno for stdin */
188
189  if (filename != NULL) {
190    fd = topen(filename, O_BINARY | O_RDONLY);
191    if (fd < 0) {
192      tperror(filename);
193      return 0;
194    }
195  }
196  for (;;) {
197    int nread;
198    char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
199    if (! buf) {
200      if (filename != NULL)
201        close(fd);
202      ftprintf(stderr, T("%s: out of memory\n"),
203               filename != NULL ? filename : T("xmlwf"));
204      return 0;
205    }
206    nread = read(fd, buf, READ_SIZE);
207    if (nread < 0) {
208      tperror(filename != NULL ? filename : T("STDIN"));
209      if (filename != NULL)
210        close(fd);
211      return 0;
212    }
213    if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
214      reportError(parser, filename != NULL ? filename : T("STDIN"));
215      if (filename != NULL)
216        close(fd);
217      return 0;
218    }
219    if (nread == 0) {
220      if (filename != NULL)
221        close(fd);
222      break;
223      ;
224    }
225  }
226  return 1;
227}
228
229static int
230externalEntityRefStream(XML_Parser parser, const XML_Char *context,
231                        const XML_Char *base, const XML_Char *systemId,
232                        const XML_Char *publicId) {
233  XML_Char *s;
234  const XML_Char *filename;
235  int ret;
236  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
237  UNUSED_P(publicId);
238  filename = resolveSystemId(base, systemId, &s);
239  XML_SetBase(entParser, filename);
240  ret = processStream(filename, entParser);
241  free(s);
242  XML_ParserFree(entParser);
243  return ret;
244}
245
246int
247XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
248  int result;
249
250  if (! XML_SetBase(parser, filename)) {
251    ftprintf(stderr, T("%s: out of memory"), filename);
252    exit(1);
253  }
254
255  if (flags & XML_EXTERNAL_ENTITIES)
256    XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
257                                                ? externalEntityRefFilemap
258                                                : externalEntityRefStream);
259  if (flags & XML_MAP_FILE) {
260    int filemapRes;
261    PROCESS_ARGS args;
262    args.retPtr = &result;
263    args.parser = parser;
264    filemapRes = filemap(filename, processFile, &args);
265    switch (filemapRes) {
266    case 0:
267      result = 0;
268      break;
269    case 2:
270      ftprintf(stderr,
271               T("%s: file too large for memory-mapping")
272                   T(", switching to streaming\n"),
273               filename);
274      result = processStream(filename, parser);
275      break;
276    }
277  } else
278    result = processStream(filename, parser);
279  return result;
280}
281