1/*
2The contents of this file are subject to the Mozilla Public License
3Version 1.0 (the "License"); you may not use this file except in
4compliance with the License. You may obtain a copy of the License at
5http://www.mozilla.org/MPL/
6
7Software distributed under the License is distributed on an "AS IS"
8basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9License for the specific language governing rights and limitations
10under the License.
11
12The Original Code is expat.
13
14The Initial Developer of the Original Code is James Clark.
15Portions created by James Clark are Copyright (C) 1998
16James Clark. All Rights Reserved.
17
18Contributor(s):
19*/
20
21#include <stdlib.h>
22#include <string.h>
23#include <stddef.h>
24
25#include "xmldef.h"
26
27#ifdef XML_UNICODE
28#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
29#define XmlConvert XmlUtf16Convert
30#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
31#define XmlEncode XmlUtf16Encode
32#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
33typedef unsigned short ICHAR;
34#else
35#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
36#define XmlConvert XmlUtf8Convert
37#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
38#define XmlEncode XmlUtf8Encode
39#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
40typedef char ICHAR;
41#endif
42
43#ifdef XML_UNICODE_WCHAR_T
44#define XML_T(x) L ## x
45#else
46#define XML_T(x) x
47#endif
48
49/* Round up n to be a multiple of sz, where sz is a power of 2. */
50#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
51
52#include "xmlparse.h"
53#include "xmltok.h"
54#include "xmlrole.h"
55#include "hashtable.h"
56
57#define INIT_TAG_BUF_SIZE 32  /* must be a multiple of sizeof(XML_Char) */
58#define INIT_DATA_BUF_SIZE 1024
59#define INIT_ATTS_SIZE 16
60#define INIT_BLOCK_SIZE 1024
61#define INIT_BUFFER_SIZE 1024
62
63typedef struct tag {
64  struct tag *parent;
65  const char *rawName;
66  size_t rawNameLength;
67  const XML_Char *name;
68  char *buf;
69  char *bufEnd;
70} TAG;
71
72typedef struct {
73  const XML_Char *name;
74  const XML_Char *textPtr;
75  int textLen;
76  const XML_Char *systemId;
77  const XML_Char *base;
78  const XML_Char *publicId;
79  const XML_Char *notation;
80  char open;
81} ENTITY;
82
83typedef struct block {
84  struct block *next;
85  int size;
86  XML_Char s[1];
87} BLOCK;
88
89typedef struct {
90  BLOCK *blocks;
91  BLOCK *freeBlocks;
92  const XML_Char *end;
93  XML_Char *ptr;
94  XML_Char *start;
95} STRING_POOL;
96
97/* The XML_Char before the name is used to determine whether
98an attribute has been specified. */
99typedef struct {
100  XML_Char *name;
101  char maybeTokenized;
102} ATTRIBUTE_ID;
103
104typedef struct {
105  const ATTRIBUTE_ID *id;
106  char isCdata;
107  const XML_Char *value;
108} DEFAULT_ATTRIBUTE;
109
110typedef struct {
111  const XML_Char *name;
112  int nDefaultAtts;
113  int allocDefaultAtts;
114  DEFAULT_ATTRIBUTE *defaultAtts;
115} ELEMENT_TYPE;
116
117typedef struct {
118  HASH_TABLE generalEntities;
119  HASH_TABLE elementTypes;
120  HASH_TABLE attributeIds;
121  STRING_POOL pool;
122  int complete;
123  int standalone;
124  const XML_Char *base;
125} DTD;
126
127typedef enum XML_Error Processor(XML_Parser parser,
128				 const char *start,
129				 const char *end,
130				 const char **endPtr);
131
132static Processor prologProcessor;
133static Processor prologInitProcessor;
134static Processor contentProcessor;
135static Processor cdataSectionProcessor;
136static Processor epilogProcessor;
137static Processor errorProcessor;
138static Processor externalEntityInitProcessor;
139static Processor externalEntityInitProcessor2;
140static Processor externalEntityInitProcessor3;
141static Processor externalEntityContentProcessor;
142
143static enum XML_Error
144handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
145static enum XML_Error
146processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
147static enum XML_Error
148initializeEncoding(XML_Parser parser);
149static enum XML_Error
150doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
151	  const char *start, const char *end, const char **endPtr);
152static enum XML_Error
153doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
154static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const XML_Char *tagName, const char *s);
155static int
156defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
157static enum XML_Error
158storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
159		    STRING_POOL *);
160static enum XML_Error
161appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
162		    STRING_POOL *);
163static ATTRIBUTE_ID *
164getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
165static enum XML_Error
166storeEntityValue(XML_Parser parser, const char *start, const char *end);
167static int
168reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
169static void
170reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
171
172static const XML_Char *getOpenEntityNames(XML_Parser parser);
173static int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames);
174static void normalizePublicId(XML_Char *s);
175static int dtdInit(DTD *);
176static void dtdDestroy(DTD *);
177static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
178static void poolInit(STRING_POOL *);
179static void poolClear(STRING_POOL *);
180static void poolDestroy(STRING_POOL *);
181static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
182			    const char *ptr, const char *end);
183static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
184				  const char *ptr, const char *end);
185static int poolGrow(STRING_POOL *pool);
186static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
187static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
188
189#define poolStart(pool) ((pool)->start)
190#define poolEnd(pool) ((pool)->ptr)
191#define poolLength(pool) ((pool)->ptr - (pool)->start)
192#define poolChop(pool) ((void)--(pool->ptr))
193#define poolLastChar(pool) (((pool)->ptr)[-1])
194#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
195#define poolFinish(pool) ((pool)->start = (pool)->ptr)
196#define poolAppendChar(pool, c) \
197  (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
198   ? 0 \
199   : ((*((pool)->ptr)++ = c), 1))
200
201typedef struct {
202  /* The first member must be userData so that the XML_GetUserData macro works. */
203  void *userData;
204  void *handlerArg;
205  char *buffer;
206  /* first character to be parsed */
207  const char *bufferPtr;
208  /* past last character to be parsed */
209  char *bufferEnd;
210  /* allocated end of buffer */
211  const char *bufferLim;
212  long parseEndByteIndex;
213  const char *parseEndPtr;
214  XML_Char *dataBuf;
215  XML_Char *dataBufEnd;
216  XML_StartElementHandler startElementHandler;
217  XML_EndElementHandler endElementHandler;
218  XML_CharacterDataHandler characterDataHandler;
219  XML_ProcessingInstructionHandler processingInstructionHandler;
220  XML_DefaultHandler defaultHandler;
221  XML_UnparsedEntityDeclHandler unparsedEntityDeclHandler;
222  XML_NotationDeclHandler notationDeclHandler;
223  XML_ExternalEntityRefHandler externalEntityRefHandler;
224  XML_UnknownEncodingHandler unknownEncodingHandler;
225  const ENCODING *encoding;
226  INIT_ENCODING initEncoding;
227  const XML_Char *protocolEncodingName;
228  void *unknownEncodingMem;
229  void *unknownEncodingData;
230  void *unknownEncodingHandlerData;
231  void (*unknownEncodingRelease)(void *);
232  PROLOG_STATE prologState;
233  Processor *processor;
234  enum XML_Error errorCode;
235  const char *eventPtr;
236  const char *eventEndPtr;
237  const char *positionPtr;
238  int tagLevel;
239  ENTITY *declEntity;
240  const XML_Char *declNotationName;
241  const XML_Char *declNotationPublicId;
242  ELEMENT_TYPE *declElementType;
243  ATTRIBUTE_ID *declAttributeId;
244  char declAttributeIsCdata;
245  DTD dtd;
246  TAG *tagStack;
247  TAG *freeTagList;
248  int attsSize;
249  ATTRIBUTE *atts;
250  POSITION position;
251  STRING_POOL tempPool;
252  STRING_POOL temp2Pool;
253  char *groupConnector;
254  unsigned groupSize;
255  int hadExternalDoctype;
256} Parser;
257
258#define userData (((Parser *)parser)->userData)
259#define handlerArg (((Parser *)parser)->handlerArg)
260#define startElementHandler (((Parser *)parser)->startElementHandler)
261#define endElementHandler (((Parser *)parser)->endElementHandler)
262#define characterDataHandler (((Parser *)parser)->characterDataHandler)
263#define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler)
264#define defaultHandler (((Parser *)parser)->defaultHandler)
265#define unparsedEntityDeclHandler (((Parser *)parser)->unparsedEntityDeclHandler)
266#define notationDeclHandler (((Parser *)parser)->notationDeclHandler)
267#define externalEntityRefHandler (((Parser *)parser)->externalEntityRefHandler)
268#define unknownEncodingHandler (((Parser *)parser)->unknownEncodingHandler)
269#define encoding (((Parser *)parser)->encoding)
270#define initEncoding (((Parser *)parser)->initEncoding)
271#define unknownEncodingMem (((Parser *)parser)->unknownEncodingMem)
272#define unknownEncodingData (((Parser *)parser)->unknownEncodingData)
273#define unknownEncodingHandlerData \
274  (((Parser *)parser)->unknownEncodingHandlerData)
275#define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease)
276#define protocolEncodingName (((Parser *)parser)->protocolEncodingName)
277#define prologState (((Parser *)parser)->prologState)
278#define processor (((Parser *)parser)->processor)
279#define errorCode (((Parser *)parser)->errorCode)
280#define eventPtr (((Parser *)parser)->eventPtr)
281#define eventEndPtr (((Parser *)parser)->eventEndPtr)
282#define positionPtr (((Parser *)parser)->positionPtr)
283#define position (((Parser *)parser)->position)
284#define tagLevel (((Parser *)parser)->tagLevel)
285#define buffer (((Parser *)parser)->buffer)
286#define bufferPtr (((Parser *)parser)->bufferPtr)
287#define bufferEnd (((Parser *)parser)->bufferEnd)
288#define parseEndByteIndex (((Parser *)parser)->parseEndByteIndex)
289#define parseEndPtr (((Parser *)parser)->parseEndPtr)
290#define bufferLim (((Parser *)parser)->bufferLim)
291#define dataBuf (((Parser *)parser)->dataBuf)
292#define dataBufEnd (((Parser *)parser)->dataBufEnd)
293#define dtd (((Parser *)parser)->dtd)
294#define declEntity (((Parser *)parser)->declEntity)
295#define declNotationName (((Parser *)parser)->declNotationName)
296#define declNotationPublicId (((Parser *)parser)->declNotationPublicId)
297#define declElementType (((Parser *)parser)->declElementType)
298#define declAttributeId (((Parser *)parser)->declAttributeId)
299#define declAttributeIsCdata (((Parser *)parser)->declAttributeIsCdata)
300#define freeTagList (((Parser *)parser)->freeTagList)
301#define tagStack (((Parser *)parser)->tagStack)
302#define atts (((Parser *)parser)->atts)
303#define attsSize (((Parser *)parser)->attsSize)
304#define tempPool (((Parser *)parser)->tempPool)
305#define temp2Pool (((Parser *)parser)->temp2Pool)
306#define groupConnector (((Parser *)parser)->groupConnector)
307#define groupSize (((Parser *)parser)->groupSize)
308#define hadExternalDoctype (((Parser *)parser)->hadExternalDoctype)
309
310XML_Parser XML_ParserCreate(const XML_Char *encodingName)
311{
312  XML_Parser parser = malloc(sizeof(Parser));
313  if (!parser)
314    return parser;
315  processor = prologInitProcessor;
316  XmlPrologStateInit(&prologState);
317  userData = 0;
318  handlerArg = 0;
319  startElementHandler = 0;
320  endElementHandler = 0;
321  characterDataHandler = 0;
322  processingInstructionHandler = 0;
323  defaultHandler = 0;
324  unparsedEntityDeclHandler = 0;
325  notationDeclHandler = 0;
326  externalEntityRefHandler = 0;
327  unknownEncodingHandler = 0;
328  buffer = 0;
329  bufferPtr = 0;
330  bufferEnd = 0;
331  parseEndByteIndex = 0;
332  parseEndPtr = 0;
333  bufferLim = 0;
334  declElementType = 0;
335  declAttributeId = 0;
336  declEntity = 0;
337  declNotationName = 0;
338  declNotationPublicId = 0;
339  memset(&position, 0, sizeof(POSITION));
340  errorCode = XML_ERROR_NONE;
341  eventPtr = 0;
342  eventEndPtr = 0;
343  positionPtr = 0;
344  tagLevel = 0;
345  tagStack = 0;
346  freeTagList = 0;
347  attsSize = INIT_ATTS_SIZE;
348  atts = malloc(attsSize * sizeof(ATTRIBUTE));
349  dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
350  groupSize = 0;
351  groupConnector = 0;
352  hadExternalDoctype = 0;
353  unknownEncodingMem = 0;
354  unknownEncodingRelease = 0;
355  unknownEncodingData = 0;
356  unknownEncodingHandlerData = 0;
357  poolInit(&tempPool);
358  poolInit(&temp2Pool);
359  protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
360  if (!dtdInit(&dtd) || !atts || !dataBuf
361      || (encodingName && !protocolEncodingName)) {
362    XML_ParserFree(parser);
363    return 0;
364  }
365  dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
366  XmlInitEncoding(&initEncoding, &encoding, 0);
367  return parser;
368}
369
370XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
371					  const XML_Char *openEntityNames,
372					  const XML_Char *encodingName)
373{
374  XML_Parser parser = oldParser;
375  DTD *oldDtd = &dtd;
376  XML_StartElementHandler oldStartElementHandler = startElementHandler;
377  XML_EndElementHandler oldEndElementHandler = endElementHandler;
378  XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
379  XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
380  XML_DefaultHandler oldDefaultHandler = defaultHandler;
381  XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
382  XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
383  void *oldUserData = userData;
384  void *oldHandlerArg = handlerArg;
385
386  parser = XML_ParserCreate(encodingName);
387  if (!parser)
388    return 0;
389  startElementHandler = oldStartElementHandler;
390  endElementHandler = oldEndElementHandler;
391  characterDataHandler = oldCharacterDataHandler;
392  processingInstructionHandler = oldProcessingInstructionHandler;
393  defaultHandler = oldDefaultHandler;
394  externalEntityRefHandler = oldExternalEntityRefHandler;
395  unknownEncodingHandler = oldUnknownEncodingHandler;
396  userData = oldUserData;
397  if (oldUserData == oldHandlerArg)
398    handlerArg = userData;
399  else
400    handlerArg = parser;
401  if (!dtdCopy(&dtd, oldDtd) || !setOpenEntityNames(parser, openEntityNames)) {
402    XML_ParserFree(parser);
403    return 0;
404  }
405  processor = externalEntityInitProcessor;
406  return parser;
407}
408
409void XML_ParserFree(XML_Parser parser)
410{
411  for (;;) {
412    TAG *p;
413    if (tagStack == 0) {
414      if (freeTagList == 0)
415	break;
416      tagStack = freeTagList;
417      freeTagList = 0;
418    }
419    p = tagStack;
420    tagStack = tagStack->parent;
421    free(p->buf);
422    free(p);
423  }
424  poolDestroy(&tempPool);
425  poolDestroy(&temp2Pool);
426  dtdDestroy(&dtd);
427  free((void *)atts);
428  free(groupConnector);
429  free(buffer);
430  free(dataBuf);
431  free(unknownEncodingMem);
432  if (unknownEncodingRelease)
433    unknownEncodingRelease(unknownEncodingData);
434  free(parser);
435}
436
437void XML_UseParserAsHandlerArg(XML_Parser parser)
438{
439  handlerArg = parser;
440}
441
442void XML_SetUserData(XML_Parser parser, void *p)
443{
444  if (handlerArg == userData)
445    handlerArg = userData = p;
446  else
447    userData = p;
448}
449
450int XML_SetBase(XML_Parser parser, const XML_Char *p)
451{
452  if (p) {
453    p = poolCopyString(&dtd.pool, p);
454    if (!p)
455      return 0;
456    dtd.base = p;
457  }
458  else
459    dtd.base = 0;
460  return 1;
461}
462
463const XML_Char *XML_GetBase(XML_Parser parser)
464{
465  return dtd.base;
466}
467
468void XML_SetElementHandler(XML_Parser parser,
469			   XML_StartElementHandler start,
470			   XML_EndElementHandler end)
471{
472  startElementHandler = start;
473  endElementHandler = end;
474}
475
476void XML_SetCharacterDataHandler(XML_Parser parser,
477				 XML_CharacterDataHandler handler)
478{
479  characterDataHandler = handler;
480}
481
482void XML_SetProcessingInstructionHandler(XML_Parser parser,
483					 XML_ProcessingInstructionHandler handler)
484{
485  processingInstructionHandler = handler;
486}
487
488void XML_SetDefaultHandler(XML_Parser parser,
489			   XML_DefaultHandler handler)
490{
491  defaultHandler = handler;
492}
493
494void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
495				      XML_UnparsedEntityDeclHandler handler)
496{
497  unparsedEntityDeclHandler = handler;
498}
499
500void XML_SetNotationDeclHandler(XML_Parser parser,
501				XML_NotationDeclHandler handler)
502{
503  notationDeclHandler = handler;
504}
505
506void XML_SetExternalEntityRefHandler(XML_Parser parser,
507				     XML_ExternalEntityRefHandler handler)
508{
509  externalEntityRefHandler = handler;
510}
511
512void XML_SetUnknownEncodingHandler(XML_Parser parser,
513				   XML_UnknownEncodingHandler handler,
514				   void *data)
515{
516  unknownEncodingHandler = handler;
517  unknownEncodingHandlerData = data;
518}
519
520int XML_Parse(XML_Parser parser, const char *s, size_t len, int isFinal)
521{
522  if (len == 0) {
523    if (!isFinal)
524      return 1;
525    errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
526    if (errorCode == XML_ERROR_NONE)
527      return 1;
528    eventEndPtr = eventPtr;
529    return 0;
530  }
531  else if (bufferPtr == bufferEnd) {
532    const char *end;
533    size_t nLeftOver;
534    parseEndByteIndex += len;
535    positionPtr = s;
536    if (isFinal) {
537      errorCode = processor(parser, s, parseEndPtr = s + len, 0);
538      if (errorCode == XML_ERROR_NONE)
539	return 1;
540      eventEndPtr = eventPtr;
541      return 0;
542    }
543    errorCode = processor(parser, s, parseEndPtr = s + len, &end);
544    if (errorCode != XML_ERROR_NONE) {
545      eventEndPtr = eventPtr;
546      return 0;
547    }
548    XmlUpdatePosition(encoding, positionPtr, end, &position);
549    nLeftOver = s + len - end;
550    if (nLeftOver) {
551      if (buffer == 0 || nLeftOver > bufferLim - buffer) {
552	/* FIXME avoid integer overflow */
553	buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
554	if (!buffer) {
555	  errorCode = XML_ERROR_NO_MEMORY;
556	  eventPtr = eventEndPtr = 0;
557	  return 0;
558	}
559	bufferLim = buffer + len * 2;
560      }
561      memcpy(buffer, end, nLeftOver);
562      bufferPtr = buffer;
563      bufferEnd = buffer + nLeftOver;
564    }
565    return 1;
566  }
567  else {
568    memcpy(XML_GetBuffer(parser, len), s, len);
569    return XML_ParseBuffer(parser, len, isFinal);
570  }
571}
572
573int XML_ParseBuffer(XML_Parser parser, size_t len, int isFinal)
574{
575  const char *start = bufferPtr;
576  positionPtr = start;
577  bufferEnd += len;
578  parseEndByteIndex += len;
579  errorCode = processor(parser, start, parseEndPtr = bufferEnd,
580			isFinal ? (const char **)0 : &bufferPtr);
581  if (errorCode == XML_ERROR_NONE) {
582    if (!isFinal)
583      XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
584    return 1;
585  }
586  else {
587    eventEndPtr = eventPtr;
588    return 0;
589  }
590}
591
592void *XML_GetBuffer(XML_Parser parser, size_t len)
593{
594  if (len > bufferLim - bufferEnd) {
595    /* FIXME avoid integer overflow */
596    size_t neededSize = len + (bufferEnd - bufferPtr);
597    if (neededSize  <= bufferLim - buffer) {
598      memmove(buffer, bufferPtr, (size_t)(bufferEnd - bufferPtr));
599      bufferEnd = buffer + (bufferEnd - bufferPtr);
600      bufferPtr = buffer;
601    }
602    else {
603      char *newBuf;
604      size_t bufferSize = bufferLim - bufferPtr;
605      if (bufferSize == 0)
606	bufferSize = INIT_BUFFER_SIZE;
607      do {
608	bufferSize *= 2;
609      } while (bufferSize < neededSize);
610      newBuf = malloc(bufferSize);
611      if (newBuf == 0) {
612	errorCode = XML_ERROR_NO_MEMORY;
613	return 0;
614      }
615      bufferLim = newBuf + bufferSize;
616      if (bufferPtr) {
617	memcpy(newBuf, bufferPtr, (size_t)(bufferEnd - bufferPtr));
618	free(buffer);
619      }
620      bufferEnd = newBuf + (bufferEnd - bufferPtr);
621      bufferPtr = buffer = newBuf;
622    }
623  }
624  return bufferEnd;
625}
626
627enum XML_Error XML_GetErrorCode(XML_Parser parser)
628{
629  return errorCode;
630}
631
632long XML_GetCurrentByteIndex(XML_Parser parser)
633{
634  if (eventPtr)
635    return parseEndByteIndex - (parseEndPtr - eventPtr);
636  return -1;
637}
638
639int XML_GetCurrentLineNumber(XML_Parser parser)
640{
641  if (eventPtr) {
642    XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
643    positionPtr = eventPtr;
644  }
645  return position.lineNumber + 1;
646}
647
648int XML_GetCurrentColumnNumber(XML_Parser parser)
649{
650  if (eventPtr) {
651    XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
652    positionPtr = eventPtr;
653  }
654  return position.columnNumber;
655}
656
657void XML_DefaultCurrent(XML_Parser parser)
658{
659  if (defaultHandler)
660    reportDefault(parser, encoding, eventPtr, eventEndPtr);
661}
662
663const XML_LChar *XML_ErrorString(enum XML_Error code)
664{
665  static const XML_LChar *message[] = {
666    0,
667    XML_T("out of memory"),
668    XML_T("syntax error"),
669    XML_T("no element found"),
670    XML_T("not well-formed"),
671    XML_T("unclosed token"),
672    XML_T("unclosed token"),
673    XML_T("mismatched tag"),
674    XML_T("duplicate attribute"),
675    XML_T("junk after document element"),
676    XML_T("illegal parameter entity reference"),
677    XML_T("undefined entity"),
678    XML_T("recursive entity reference"),
679    XML_T("asynchronous entity"),
680    XML_T("reference to invalid character number"),
681    XML_T("reference to binary entity"),
682    XML_T("reference to external entity in attribute"),
683    XML_T("xml processing instruction not at start of external entity"),
684    XML_T("unknown encoding"),
685    XML_T("encoding specified in XML declaration is incorrect"),
686    XML_T("unclosed CDATA section"),
687    XML_T("error in processing external entity reference")
688  };
689  if (code > 0 && code < sizeof(message)/sizeof(message[0]))
690    return message[code];
691  return 0;
692}
693
694static
695enum XML_Error contentProcessor(XML_Parser parser,
696				const char *start,
697				const char *end,
698				const char **endPtr)
699{
700  return doContent(parser, 0, encoding, start, end, endPtr);
701}
702
703static
704enum XML_Error externalEntityInitProcessor(XML_Parser parser,
705					   const char *start,
706					   const char *end,
707					   const char **endPtr)
708{
709  enum XML_Error result = initializeEncoding(parser);
710  if (result != XML_ERROR_NONE)
711    return result;
712  processor = externalEntityInitProcessor2;
713  return externalEntityInitProcessor2(parser, start, end, endPtr);
714}
715
716static
717enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
718					    const char *start,
719					    const char *end,
720					    const char **endPtr)
721{
722  const char *next;
723  int tok = XmlContentTok(encoding, start, end, &next);
724  switch (tok) {
725  case XML_TOK_BOM:
726    start = next;
727    break;
728  case XML_TOK_PARTIAL:
729    if (endPtr) {
730      *endPtr = start;
731      return XML_ERROR_NONE;
732    }
733    eventPtr = start;
734    return XML_ERROR_UNCLOSED_TOKEN;
735  case XML_TOK_PARTIAL_CHAR:
736    if (endPtr) {
737      *endPtr = start;
738      return XML_ERROR_NONE;
739    }
740    eventPtr = start;
741    return XML_ERROR_PARTIAL_CHAR;
742  }
743  processor = externalEntityInitProcessor3;
744  return externalEntityInitProcessor3(parser, start, end, endPtr);
745}
746
747static
748enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
749					    const char *start,
750					    const char *end,
751					    const char **endPtr)
752{
753  const char *next;
754  int tok = XmlContentTok(encoding, start, end, &next);
755  switch (tok) {
756  case XML_TOK_XML_DECL:
757    {
758      enum XML_Error result = processXmlDecl(parser, 1, start, next);
759      if (result != XML_ERROR_NONE)
760	return result;
761      start = next;
762    }
763    break;
764  case XML_TOK_PARTIAL:
765    if (endPtr) {
766      *endPtr = start;
767      return XML_ERROR_NONE;
768    }
769    eventPtr = start;
770    return XML_ERROR_UNCLOSED_TOKEN;
771  case XML_TOK_PARTIAL_CHAR:
772    if (endPtr) {
773      *endPtr = start;
774      return XML_ERROR_NONE;
775    }
776    eventPtr = start;
777    return XML_ERROR_PARTIAL_CHAR;
778  }
779  processor = externalEntityContentProcessor;
780  tagLevel = 1;
781  return doContent(parser, 1, encoding, start, end, endPtr);
782}
783
784static
785enum XML_Error externalEntityContentProcessor(XML_Parser parser,
786					      const char *start,
787					      const char *end,
788					      const char **endPtr)
789{
790  return doContent(parser, 1, encoding, start, end, endPtr);
791}
792
793static enum XML_Error
794doContent(XML_Parser parser,
795	  int startTagLevel,
796	  const ENCODING *enc,
797	  const char *s,
798	  const char *end,
799	  const char **nextPtr)
800{
801  const ENCODING *internalEnc = XmlGetInternalEncoding();
802  const char *dummy;
803  const char **eventPP;
804  const char **eventEndPP;
805  if (enc == encoding) {
806    eventPP = &eventPtr;
807    *eventPP = s;
808    eventEndPP = &eventEndPtr;
809  }
810  else
811    eventPP = eventEndPP = &dummy;
812  for (;;) {
813    const char *next;
814    int tok = XmlContentTok(enc, s, end, &next);
815    *eventEndPP = next;
816    switch (tok) {
817    case XML_TOK_TRAILING_CR:
818      if (nextPtr) {
819	*nextPtr = s;
820	return XML_ERROR_NONE;
821      }
822      *eventEndPP = end;
823      if (characterDataHandler) {
824	XML_Char c = XML_T('\n');
825	characterDataHandler(handlerArg, &c, 1);
826      }
827      else if (defaultHandler)
828	reportDefault(parser, enc, s, end);
829      if (startTagLevel == 0)
830	return XML_ERROR_NO_ELEMENTS;
831      if (tagLevel != startTagLevel)
832	return XML_ERROR_ASYNC_ENTITY;
833      return XML_ERROR_NONE;
834    case XML_TOK_NONE:
835      if (nextPtr) {
836	*nextPtr = s;
837	return XML_ERROR_NONE;
838      }
839      if (startTagLevel > 0) {
840	if (tagLevel != startTagLevel)
841	  return XML_ERROR_ASYNC_ENTITY;
842	return XML_ERROR_NONE;
843      }
844      return XML_ERROR_NO_ELEMENTS;
845    case XML_TOK_INVALID:
846      *eventPP = next;
847      return XML_ERROR_INVALID_TOKEN;
848    case XML_TOK_PARTIAL:
849      if (nextPtr) {
850	*nextPtr = s;
851	return XML_ERROR_NONE;
852      }
853      return XML_ERROR_UNCLOSED_TOKEN;
854    case XML_TOK_PARTIAL_CHAR:
855      if (nextPtr) {
856	*nextPtr = s;
857	return XML_ERROR_NONE;
858      }
859      return XML_ERROR_PARTIAL_CHAR;
860    case XML_TOK_ENTITY_REF:
861      {
862	const XML_Char *name;
863	ENTITY *entity;
864	XML_Char ch = XmlPredefinedEntityName(enc,
865					      s + enc->minBytesPerChar,
866					      next - enc->minBytesPerChar);
867	if (ch) {
868	  if (characterDataHandler)
869	    characterDataHandler(handlerArg, &ch, 1);
870	  else if (defaultHandler)
871	    reportDefault(parser, enc, s, next);
872	  break;
873	}
874	name = poolStoreString(&dtd.pool, enc,
875				s + enc->minBytesPerChar,
876				next - enc->minBytesPerChar);
877	if (!name)
878	  return XML_ERROR_NO_MEMORY;
879	entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
880	poolDiscard(&dtd.pool);
881	if (!entity) {
882	  if (dtd.complete || dtd.standalone)
883	    return XML_ERROR_UNDEFINED_ENTITY;
884	  if (defaultHandler)
885	    reportDefault(parser, enc, s, next);
886	  break;
887	}
888	if (entity->open)
889	  return XML_ERROR_RECURSIVE_ENTITY_REF;
890	if (entity->notation)
891	  return XML_ERROR_BINARY_ENTITY_REF;
892	if (entity) {
893	  if (entity->textPtr) {
894	    enum XML_Error result;
895	    if (defaultHandler) {
896	      reportDefault(parser, enc, s, next);
897	      break;
898	    }
899	    /* Protect against the possibility that somebody sets
900	       the defaultHandler from inside another handler. */
901	    *eventEndPP = *eventPP;
902	    entity->open = 1;
903	    result = doContent(parser,
904			       tagLevel,
905			       internalEnc,
906			       (char *)entity->textPtr,
907			       (char *)(entity->textPtr + entity->textLen),
908			       0);
909	    entity->open = 0;
910	    if (result)
911	      return result;
912	  }
913	  else if (externalEntityRefHandler) {
914	    const XML_Char *openEntityNames;
915	    entity->open = 1;
916	    openEntityNames = getOpenEntityNames(parser);
917	    entity->open = 0;
918	    if (!openEntityNames)
919	      return XML_ERROR_NO_MEMORY;
920	    if (!externalEntityRefHandler(parser, openEntityNames, dtd.base, entity->systemId, entity->publicId))
921	      return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
922	  }
923	  else if (defaultHandler)
924	    reportDefault(parser, enc, s, next);
925	}
926	break;
927      }
928    case XML_TOK_START_TAG_WITH_ATTS:
929      if (!startElementHandler) {
930	enum XML_Error result = storeAtts(parser, enc, 0, s);
931	if (result)
932	  return result;
933      }
934      /* fall through */
935    case XML_TOK_START_TAG_NO_ATTS:
936      {
937	TAG *tag;
938	if (freeTagList) {
939	  tag = freeTagList;
940	  freeTagList = freeTagList->parent;
941	}
942	else {
943	  tag = malloc(sizeof(TAG));
944	  if (!tag)
945	    return XML_ERROR_NO_MEMORY;
946	  tag->buf = malloc(INIT_TAG_BUF_SIZE);
947	  if (!tag->buf)
948	    return XML_ERROR_NO_MEMORY;
949	  tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
950	}
951	tag->parent = tagStack;
952	tagStack = tag;
953	tag->rawName = s + enc->minBytesPerChar;
954	tag->rawNameLength = XmlNameLength(enc, tag->rawName);
955	if (nextPtr) {
956	  if (tag->rawNameLength > tag->bufEnd - tag->buf) {
957	    size_t bufSize = tag->rawNameLength * 4;
958	    bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
959	    tag->buf = realloc(tag->buf, bufSize);
960	    if (!tag->buf)
961	      return XML_ERROR_NO_MEMORY;
962	    tag->bufEnd = tag->buf + bufSize;
963	  }
964	  memcpy(tag->buf, tag->rawName, tag->rawNameLength);
965	  tag->rawName = tag->buf;
966	}
967	++tagLevel;
968	if (startElementHandler) {
969	  enum XML_Error result;
970	  XML_Char *toPtr;
971	  for (;;) {
972	    const char *rawNameEnd = tag->rawName + tag->rawNameLength;
973	    const char *fromPtr = tag->rawName;
974	    size_t bufSize;
975	    if (nextPtr)
976	      toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
977	    else
978	      toPtr = (XML_Char *)tag->buf;
979	    tag->name = toPtr;
980	    XmlConvert(enc,
981		       &fromPtr, rawNameEnd,
982		       (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
983	    if (fromPtr == rawNameEnd)
984	      break;
985	    bufSize = (tag->bufEnd - tag->buf) << 1;
986	    tag->buf = realloc(tag->buf, bufSize);
987	    if (!tag->buf)
988	      return XML_ERROR_NO_MEMORY;
989	    tag->bufEnd = tag->buf + bufSize;
990	    if (nextPtr)
991	      tag->rawName = tag->buf;
992	  }
993	  *toPtr = XML_T('\0');
994	  result = storeAtts(parser, enc, tag->name, s);
995	  if (result)
996	    return result;
997	  startElementHandler(handlerArg, tag->name, (const XML_Char **)atts);
998	  poolClear(&tempPool);
999	}
1000	else {
1001	  tag->name = 0;
1002	  if (defaultHandler)
1003	    reportDefault(parser, enc, s, next);
1004	}
1005	break;
1006      }
1007    case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1008      if (!startElementHandler) {
1009	enum XML_Error result = storeAtts(parser, enc, 0, s);
1010	if (result)
1011	  return result;
1012      }
1013      /* fall through */
1014    case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1015      if (startElementHandler || endElementHandler) {
1016	const char *rawName = s + enc->minBytesPerChar;
1017	const XML_Char *name = poolStoreString(&tempPool, enc, rawName,
1018					       rawName
1019					       + XmlNameLength(enc, rawName));
1020	if (!name)
1021	  return XML_ERROR_NO_MEMORY;
1022	poolFinish(&tempPool);
1023	if (startElementHandler) {
1024	  enum XML_Error result = storeAtts(parser, enc, name, s);
1025	  if (result)
1026	    return result;
1027	  startElementHandler(handlerArg, name, (const XML_Char **)atts);
1028	}
1029	if (endElementHandler) {
1030	  if (startElementHandler)
1031	    *eventEndPP = *eventPP;
1032	  endElementHandler(handlerArg, name);
1033	}
1034	poolClear(&tempPool);
1035      }
1036      else if (defaultHandler)
1037	reportDefault(parser, enc, s, next);
1038      if (tagLevel == 0)
1039	return epilogProcessor(parser, next, end, nextPtr);
1040      break;
1041    case XML_TOK_END_TAG:
1042      if (tagLevel == startTagLevel)
1043        return XML_ERROR_ASYNC_ENTITY;
1044      else {
1045	size_t len;
1046	const char *rawName;
1047	TAG *tag = tagStack;
1048	tagStack = tag->parent;
1049	tag->parent = freeTagList;
1050	freeTagList = tag;
1051	rawName = s + enc->minBytesPerChar*2;
1052	len = XmlNameLength(enc, rawName);
1053	if (len != tag->rawNameLength
1054	    || memcmp(tag->rawName, rawName, len) != 0) {
1055	  *eventPP = rawName;
1056	  return XML_ERROR_TAG_MISMATCH;
1057	}
1058	--tagLevel;
1059	if (endElementHandler) {
1060	  if (tag->name)
1061	    endElementHandler(handlerArg, tag->name);
1062	  else {
1063	    const XML_Char *name = poolStoreString(&tempPool, enc, rawName,
1064	                                           rawName + len);
1065	    if (!name)
1066	      return XML_ERROR_NO_MEMORY;
1067	    endElementHandler(handlerArg, name);
1068	    poolClear(&tempPool);
1069	  }
1070	}
1071	else if (defaultHandler)
1072	  reportDefault(parser, enc, s, next);
1073	if (tagLevel == 0)
1074	  return epilogProcessor(parser, next, end, nextPtr);
1075      }
1076      break;
1077    case XML_TOK_CHAR_REF:
1078      {
1079	int n = XmlCharRefNumber(enc, s);
1080	if (n < 0)
1081	  return XML_ERROR_BAD_CHAR_REF;
1082	if (characterDataHandler) {
1083	  XML_Char buf[XML_ENCODE_MAX];
1084	  characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1085	}
1086	else if (defaultHandler)
1087	  reportDefault(parser, enc, s, next);
1088      }
1089      break;
1090    case XML_TOK_XML_DECL:
1091      return XML_ERROR_MISPLACED_XML_PI;
1092    case XML_TOK_DATA_NEWLINE:
1093      if (characterDataHandler) {
1094	XML_Char c = XML_T('\n');
1095	characterDataHandler(handlerArg, &c, 1);
1096      }
1097      else if (defaultHandler)
1098	reportDefault(parser, enc, s, next);
1099      break;
1100    case XML_TOK_CDATA_SECT_OPEN:
1101      {
1102	enum XML_Error result;
1103	if (characterDataHandler)
1104  	  characterDataHandler(handlerArg, dataBuf, 0);
1105	else if (defaultHandler)
1106	  reportDefault(parser, enc, s, next);
1107	result = doCdataSection(parser, enc, &next, end, nextPtr);
1108	if (!next) {
1109	  processor = cdataSectionProcessor;
1110	  return result;
1111	}
1112      }
1113      break;
1114    case XML_TOK_TRAILING_RSQB:
1115      if (nextPtr) {
1116	*nextPtr = s;
1117	return XML_ERROR_NONE;
1118      }
1119      if (characterDataHandler) {
1120	if (MUST_CONVERT(enc, s)) {
1121	  ICHAR *dataPtr = (ICHAR *)dataBuf;
1122	  XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1123	  characterDataHandler(handlerArg, dataBuf,
1124			       (size_t)(dataPtr - (ICHAR *)dataBuf));
1125	}
1126	else
1127	  characterDataHandler(handlerArg,
1128		  	       (XML_Char *)s,
1129			       (size_t)((XML_Char *)end - (XML_Char *)s));
1130      }
1131      else if (defaultHandler)
1132	reportDefault(parser, enc, s, end);
1133      if (startTagLevel == 0) {
1134        *eventPP = end;
1135	return XML_ERROR_NO_ELEMENTS;
1136      }
1137      if (tagLevel != startTagLevel) {
1138	*eventPP = end;
1139	return XML_ERROR_ASYNC_ENTITY;
1140      }
1141      return XML_ERROR_NONE;
1142    case XML_TOK_DATA_CHARS:
1143      if (characterDataHandler) {
1144	if (MUST_CONVERT(enc, s)) {
1145	  for (;;) {
1146	    ICHAR *dataPtr = (ICHAR *)dataBuf;
1147	    XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1148	    *eventEndPP = s;
1149	    characterDataHandler(handlerArg, dataBuf, (size_t)(dataPtr - (ICHAR *)dataBuf));
1150	    if (s == next)
1151	      break;
1152	    *eventPP = s;
1153	  }
1154	}
1155	else
1156	  characterDataHandler(handlerArg,
1157			       (XML_Char *)s,
1158			       (size_t)((XML_Char *)next - (XML_Char *)s));
1159      }
1160      else if (defaultHandler)
1161	reportDefault(parser, enc, s, next);
1162      break;
1163    case XML_TOK_PI:
1164      if (!reportProcessingInstruction(parser, enc, s, next))
1165	return XML_ERROR_NO_MEMORY;
1166      break;
1167    default:
1168      if (defaultHandler)
1169	reportDefault(parser, enc, s, next);
1170      break;
1171    }
1172    *eventPP = s = next;
1173  }
1174  /* not reached */
1175}
1176
1177/* If tagName is non-null, build a real list of attributes,
1178otherwise just check the attributes for well-formedness. */
1179
1180static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1181				const XML_Char *tagName, const char *s)
1182{
1183  ELEMENT_TYPE *elementType = 0;
1184  int nDefaultAtts = 0;
1185  const XML_Char **appAtts;
1186  int i;
1187  int n;
1188
1189  if (tagName) {
1190    elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagName, 0);
1191    if (elementType)
1192      nDefaultAtts = elementType->nDefaultAtts;
1193  }
1194
1195  n = XmlGetAttributes(enc, s, attsSize, atts);
1196  if (n + nDefaultAtts > attsSize) {
1197    int oldAttsSize = attsSize;
1198    attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1199    atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1200    if (!atts)
1201      return XML_ERROR_NO_MEMORY;
1202    if (n > oldAttsSize)
1203      XmlGetAttributes(enc, s, n, atts);
1204  }
1205  appAtts = (const XML_Char **)atts;
1206  for (i = 0; i < n; i++) {
1207    ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1208					  atts[i].name
1209					  + XmlNameLength(enc, atts[i].name));
1210    if (!attId)
1211      return XML_ERROR_NO_MEMORY;
1212    if ((attId->name)[-1]) {
1213      if (enc == encoding)
1214	eventPtr = atts[i].name;
1215      return XML_ERROR_DUPLICATE_ATTRIBUTE;
1216    }
1217    (attId->name)[-1] = 1;
1218    appAtts[i << 1] = attId->name;
1219    if (!atts[i].normalized) {
1220      enum XML_Error result;
1221      int isCdata = 1;
1222
1223      if (attId->maybeTokenized) {
1224	int j;
1225	for (j = 0; j < nDefaultAtts; j++) {
1226	  if (attId == elementType->defaultAtts[j].id) {
1227	    isCdata = elementType->defaultAtts[j].isCdata;
1228	    break;
1229	  }
1230	}
1231      }
1232
1233      result = storeAttributeValue(parser, enc, isCdata,
1234				   atts[i].valuePtr, atts[i].valueEnd,
1235			           &tempPool);
1236      if (result)
1237	return result;
1238      if (tagName) {
1239	appAtts[(i << 1) + 1] = poolStart(&tempPool);
1240	poolFinish(&tempPool);
1241      }
1242      else
1243	poolDiscard(&tempPool);
1244    }
1245    else if (tagName) {
1246      appAtts[(i << 1) + 1] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1247      if (appAtts[(i << 1) + 1] == 0)
1248	return XML_ERROR_NO_MEMORY;
1249      poolFinish(&tempPool);
1250    }
1251  }
1252  if (tagName) {
1253    int j;
1254    for (j = 0; j < nDefaultAtts; j++) {
1255      const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1256      if (!(da->id->name)[-1] && da->value) {
1257	(da->id->name)[-1] = 1;
1258	appAtts[i << 1] = da->id->name;
1259	appAtts[(i << 1) + 1] = da->value;
1260	i++;
1261      }
1262    }
1263    appAtts[i << 1] = 0;
1264  }
1265  while (i-- > 0)
1266    ((XML_Char *)appAtts[i << 1])[-1] = 0;
1267  return XML_ERROR_NONE;
1268}
1269
1270/* The idea here is to avoid using stack for each CDATA section when
1271the whole file is parsed with one call. */
1272
1273static
1274enum XML_Error cdataSectionProcessor(XML_Parser parser,
1275				     const char *start,
1276			    	     const char *end,
1277				     const char **endPtr)
1278{
1279  enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1280  if (start) {
1281    processor = contentProcessor;
1282    return contentProcessor(parser, start, end, endPtr);
1283  }
1284  return result;
1285}
1286
1287/* startPtr gets set to non-null is the section is closed, and to null if
1288the section is not yet closed. */
1289
1290static
1291enum XML_Error doCdataSection(XML_Parser parser,
1292			      const ENCODING *enc,
1293			      const char **startPtr,
1294			      const char *end,
1295			      const char **nextPtr)
1296{
1297  const char *s = *startPtr;
1298  const char *dummy;
1299  const char **eventPP;
1300  const char **eventEndPP;
1301  if (enc == encoding) {
1302    eventPP = &eventPtr;
1303    *eventPP = s;
1304    eventEndPP = &eventEndPtr;
1305  }
1306  else
1307    eventPP = eventEndPP = &dummy;
1308  *startPtr = 0;
1309  for (;;) {
1310    const char *next;
1311    int tok = XmlCdataSectionTok(enc, s, end, &next);
1312    *eventEndPP = next;
1313    switch (tok) {
1314    case XML_TOK_CDATA_SECT_CLOSE:
1315      if (characterDataHandler)
1316	characterDataHandler(handlerArg, dataBuf, 0);
1317      else if (defaultHandler)
1318	reportDefault(parser, enc, s, next);
1319      *startPtr = next;
1320      return XML_ERROR_NONE;
1321    case XML_TOK_DATA_NEWLINE:
1322      if (characterDataHandler) {
1323	XML_Char c = XML_T('\n');
1324	characterDataHandler(handlerArg, &c, 1);
1325      }
1326      else if (defaultHandler)
1327	reportDefault(parser, enc, s, next);
1328      break;
1329    case XML_TOK_DATA_CHARS:
1330      if (characterDataHandler) {
1331	if (MUST_CONVERT(enc, s)) {
1332	  for (;;) {
1333  	    ICHAR *dataPtr = (ICHAR *)dataBuf;
1334	    XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1335	    *eventEndPP = next;
1336	    characterDataHandler(handlerArg, dataBuf, (size_t)(dataPtr - (ICHAR *)dataBuf));
1337	    if (s == next)
1338	      break;
1339	    *eventPP = s;
1340	  }
1341	}
1342	else
1343	  characterDataHandler(handlerArg,
1344		  	       (XML_Char *)s,
1345			       (size_t)((XML_Char *)next - (XML_Char *)s));
1346      }
1347      else if (defaultHandler)
1348	reportDefault(parser, enc, s, next);
1349      break;
1350    case XML_TOK_INVALID:
1351      *eventPP = next;
1352      return XML_ERROR_INVALID_TOKEN;
1353    case XML_TOK_PARTIAL_CHAR:
1354      if (nextPtr) {
1355	*nextPtr = s;
1356	return XML_ERROR_NONE;
1357      }
1358      return XML_ERROR_PARTIAL_CHAR;
1359    case XML_TOK_PARTIAL:
1360    case XML_TOK_NONE:
1361      if (nextPtr) {
1362	*nextPtr = s;
1363	return XML_ERROR_NONE;
1364      }
1365      return XML_ERROR_UNCLOSED_CDATA_SECTION;
1366    default:
1367      abort();
1368    }
1369    *eventPP = s = next;
1370  }
1371  /* not reached */
1372}
1373
1374static enum XML_Error
1375initializeEncoding(XML_Parser parser)
1376{
1377  const char *s;
1378#ifdef XML_UNICODE
1379  char encodingBuf[128];
1380  if (!protocolEncodingName)
1381    s = 0;
1382  else {
1383    int i;
1384    for (i = 0; protocolEncodingName[i]; i++) {
1385      if (i == sizeof(encodingBuf) - 1
1386	  || protocolEncodingName[i] >= 0x80
1387	  || protocolEncodingName[i] < 0) {
1388	encodingBuf[0] = '\0';
1389	break;
1390      }
1391      encodingBuf[i] = (char)protocolEncodingName[i];
1392    }
1393    encodingBuf[i] = '\0';
1394    s = encodingBuf;
1395  }
1396#else
1397  s = protocolEncodingName;
1398#endif
1399  if (XmlInitEncoding(&initEncoding, &encoding, s))
1400    return XML_ERROR_NONE;
1401  return handleUnknownEncoding(parser, protocolEncodingName);
1402}
1403
1404static enum XML_Error
1405processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
1406	       const char *s, const char *next)
1407{
1408  const char *encodingName = 0;
1409  const ENCODING *newEncoding = 0;
1410  const char *version;
1411  int standalone = -1;
1412  if (!XmlParseXmlDecl(isGeneralTextEntity,
1413		       encoding,
1414		       s,
1415		       next,
1416		       &eventPtr,
1417		       &version,
1418		       &encodingName,
1419		       &newEncoding,
1420		       &standalone))
1421    return XML_ERROR_SYNTAX;
1422  if (defaultHandler)
1423    reportDefault(parser, encoding, s, next);
1424  if (!protocolEncodingName) {
1425    if (newEncoding) {
1426      if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
1427	eventPtr = encodingName;
1428	return XML_ERROR_INCORRECT_ENCODING;
1429      }
1430      encoding = newEncoding;
1431    }
1432    else if (encodingName) {
1433      enum XML_Error result;
1434      const XML_Char *s = poolStoreString(&tempPool,
1435					  encoding,
1436					  encodingName,
1437					  encodingName
1438					  + XmlNameLength(encoding, encodingName));
1439      if (!s)
1440	return XML_ERROR_NO_MEMORY;
1441      result = handleUnknownEncoding(parser, s);
1442      poolDiscard(&tempPool);
1443      if (result == XML_ERROR_UNKNOWN_ENCODING)
1444	eventPtr = encodingName;
1445      return result;
1446    }
1447  }
1448  if (!isGeneralTextEntity && standalone == 1)
1449    dtd.standalone = 1;
1450  return XML_ERROR_NONE;
1451}
1452
1453static enum XML_Error
1454handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
1455{
1456  if (unknownEncodingHandler) {
1457    XML_Encoding info;
1458    int i;
1459    for (i = 0; i < 256; i++)
1460      info.map[i] = -1;
1461    info.convert = 0;
1462    info.data = 0;
1463    info.release = 0;
1464    if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
1465      ENCODING *enc;
1466      unknownEncodingMem = malloc((size_t)XmlSizeOfUnknownEncoding());
1467      if (!unknownEncodingMem) {
1468	if (info.release)
1469	  info.release(info.data);
1470	return XML_ERROR_NO_MEMORY;
1471      }
1472      enc = XmlInitUnknownEncoding(unknownEncodingMem,
1473				   info.map,
1474				   info.convert,
1475				   info.data);
1476      if (enc) {
1477	unknownEncodingData = info.data;
1478	unknownEncodingRelease = info.release;
1479	encoding = enc;
1480	return XML_ERROR_NONE;
1481      }
1482    }
1483    if (info.release)
1484      info.release(info.data);
1485  }
1486  return XML_ERROR_UNKNOWN_ENCODING;
1487}
1488
1489static enum XML_Error
1490prologInitProcessor(XML_Parser parser,
1491		    const char *s,
1492		    const char *end,
1493		    const char **nextPtr)
1494{
1495  enum XML_Error result = initializeEncoding(parser);
1496  if (result != XML_ERROR_NONE)
1497    return result;
1498  processor = prologProcessor;
1499  return prologProcessor(parser, s, end, nextPtr);
1500}
1501
1502static enum XML_Error
1503prologProcessor(XML_Parser parser,
1504		const char *s,
1505		const char *end,
1506		const char **nextPtr)
1507{
1508  for (;;) {
1509    const char *next;
1510    int tok = XmlPrologTok(encoding, s, end, &next);
1511    if (tok <= 0) {
1512      if (nextPtr != 0 && tok != XML_TOK_INVALID) {
1513	*nextPtr = s;
1514	return XML_ERROR_NONE;
1515      }
1516      switch (tok) {
1517      case XML_TOK_INVALID:
1518	eventPtr = next;
1519	return XML_ERROR_INVALID_TOKEN;
1520      case XML_TOK_NONE:
1521	return XML_ERROR_NO_ELEMENTS;
1522      case XML_TOK_PARTIAL:
1523	return XML_ERROR_UNCLOSED_TOKEN;
1524      case XML_TOK_PARTIAL_CHAR:
1525	return XML_ERROR_PARTIAL_CHAR;
1526      case XML_TOK_TRAILING_CR:
1527	eventPtr = s + encoding->minBytesPerChar;
1528	return XML_ERROR_NO_ELEMENTS;
1529      default:
1530	abort();
1531      }
1532    }
1533    switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
1534    case XML_ROLE_XML_DECL:
1535      {
1536	enum XML_Error result = processXmlDecl(parser, 0, s, next);
1537	if (result != XML_ERROR_NONE)
1538	  return result;
1539      }
1540      break;
1541    case XML_ROLE_DOCTYPE_SYSTEM_ID:
1542      hadExternalDoctype = 1;
1543      break;
1544    case XML_ROLE_DOCTYPE_PUBLIC_ID:
1545    case XML_ROLE_ENTITY_PUBLIC_ID:
1546      if (!XmlIsPublicId(encoding, s, next, &eventPtr))
1547	return XML_ERROR_SYNTAX;
1548      if (declEntity) {
1549	XML_Char *tem = poolStoreString(&dtd.pool,
1550	                                encoding,
1551					s + encoding->minBytesPerChar,
1552	  				next - encoding->minBytesPerChar);
1553	if (!tem)
1554	  return XML_ERROR_NO_MEMORY;
1555	normalizePublicId(tem);
1556	declEntity->publicId = tem;
1557	poolFinish(&dtd.pool);
1558      }
1559      break;
1560    case XML_ROLE_INSTANCE_START:
1561      processor = contentProcessor;
1562      if (hadExternalDoctype)
1563	dtd.complete = 0;
1564      return contentProcessor(parser, s, end, nextPtr);
1565    case XML_ROLE_ATTLIST_ELEMENT_NAME:
1566      {
1567	const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
1568	if (!name)
1569	  return XML_ERROR_NO_MEMORY;
1570	declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
1571	if (!declElementType)
1572	  return XML_ERROR_NO_MEMORY;
1573	if (declElementType->name != name)
1574	  poolDiscard(&dtd.pool);
1575	else
1576	  poolFinish(&dtd.pool);
1577	break;
1578      }
1579    case XML_ROLE_ATTRIBUTE_NAME:
1580      declAttributeId = getAttributeId(parser, encoding, s, next);
1581      if (!declAttributeId)
1582	return XML_ERROR_NO_MEMORY;
1583      declAttributeIsCdata = 0;
1584      break;
1585    case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
1586      declAttributeIsCdata = 1;
1587      break;
1588    case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
1589    case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
1590      if (dtd.complete
1591	  && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
1592	return XML_ERROR_NO_MEMORY;
1593      break;
1594    case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
1595    case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
1596      {
1597	const XML_Char *attVal;
1598	enum XML_Error result
1599	  = storeAttributeValue(parser, encoding, declAttributeIsCdata,
1600				s + encoding->minBytesPerChar,
1601			        next - encoding->minBytesPerChar,
1602			        &dtd.pool);
1603	if (result)
1604	  return result;
1605	attVal = poolStart(&dtd.pool);
1606	poolFinish(&dtd.pool);
1607	if (dtd.complete
1608	    && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
1609	  return XML_ERROR_NO_MEMORY;
1610	break;
1611      }
1612    case XML_ROLE_ENTITY_VALUE:
1613      {
1614	enum XML_Error result = storeEntityValue(parser, s, next);
1615	if (result != XML_ERROR_NONE)
1616	  return result;
1617      }
1618      break;
1619    case XML_ROLE_ENTITY_SYSTEM_ID:
1620      if (declEntity) {
1621	declEntity->systemId = poolStoreString(&dtd.pool, encoding,
1622	                                       s + encoding->minBytesPerChar,
1623	  				       next - encoding->minBytesPerChar);
1624	if (!declEntity->systemId)
1625	  return XML_ERROR_NO_MEMORY;
1626	declEntity->base = dtd.base;
1627	poolFinish(&dtd.pool);
1628      }
1629      break;
1630    case XML_ROLE_ENTITY_NOTATION_NAME:
1631      if (declEntity) {
1632	declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next);
1633	if (!declEntity->notation)
1634	  return XML_ERROR_NO_MEMORY;
1635	poolFinish(&dtd.pool);
1636	if (unparsedEntityDeclHandler) {
1637	  eventPtr = eventEndPtr = s;
1638	  unparsedEntityDeclHandler(handlerArg,
1639				    declEntity->name,
1640				    declEntity->base,
1641				    declEntity->systemId,
1642				    declEntity->publicId,
1643				    declEntity->notation);
1644	}
1645
1646      }
1647      break;
1648    case XML_ROLE_GENERAL_ENTITY_NAME:
1649      {
1650	const XML_Char *name;
1651	if (XmlPredefinedEntityName(encoding, s, next)) {
1652	  declEntity = 0;
1653	  break;
1654	}
1655	name = poolStoreString(&dtd.pool, encoding, s, next);
1656	if (!name)
1657	  return XML_ERROR_NO_MEMORY;
1658	if (dtd.complete) {
1659	  declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
1660	  if (!declEntity)
1661	    return XML_ERROR_NO_MEMORY;
1662	  if (declEntity->name != name) {
1663	    poolDiscard(&dtd.pool);
1664	    declEntity = 0;
1665	  }
1666	  else
1667	    poolFinish(&dtd.pool);
1668	}
1669	else {
1670	  poolDiscard(&dtd.pool);
1671	  declEntity = 0;
1672	}
1673      }
1674      break;
1675    case XML_ROLE_PARAM_ENTITY_NAME:
1676      declEntity = 0;
1677      break;
1678    case XML_ROLE_NOTATION_NAME:
1679      declNotationPublicId = 0;
1680      declNotationName = 0;
1681      if (notationDeclHandler) {
1682	declNotationName = poolStoreString(&tempPool, encoding, s, next);
1683	if (!declNotationName)
1684	  return XML_ERROR_NO_MEMORY;
1685	poolFinish(&tempPool);
1686      }
1687      break;
1688    case XML_ROLE_NOTATION_PUBLIC_ID:
1689      if (!XmlIsPublicId(encoding, s, next, &eventPtr))
1690	return XML_ERROR_SYNTAX;
1691      if (declNotationName) {
1692	XML_Char *tem = poolStoreString(&tempPool,
1693	                                encoding,
1694					s + encoding->minBytesPerChar,
1695	  				next - encoding->minBytesPerChar);
1696	if (!tem)
1697	  return XML_ERROR_NO_MEMORY;
1698	normalizePublicId(tem);
1699	declNotationPublicId = tem;
1700	poolFinish(&tempPool);
1701      }
1702      break;
1703    case XML_ROLE_NOTATION_SYSTEM_ID:
1704      if (declNotationName && notationDeclHandler) {
1705	const XML_Char *systemId
1706	  = poolStoreString(&tempPool, encoding,
1707			    s + encoding->minBytesPerChar,
1708	  		    next - encoding->minBytesPerChar);
1709	if (!systemId)
1710	  return XML_ERROR_NO_MEMORY;
1711	eventPtr = eventEndPtr = s;
1712	notationDeclHandler(handlerArg,
1713			    declNotationName,
1714			    dtd.base,
1715			    systemId,
1716			    declNotationPublicId);
1717      }
1718      poolClear(&tempPool);
1719      break;
1720    case XML_ROLE_NOTATION_NO_SYSTEM_ID:
1721      if (declNotationPublicId && notationDeclHandler) {
1722	eventPtr = eventEndPtr = s;
1723	notationDeclHandler(handlerArg,
1724			    declNotationName,
1725			    dtd.base,
1726			    0,
1727			    declNotationPublicId);
1728      }
1729      poolClear(&tempPool);
1730      break;
1731    case XML_ROLE_ERROR:
1732      eventPtr = s;
1733      switch (tok) {
1734      case XML_TOK_PARAM_ENTITY_REF:
1735	return XML_ERROR_PARAM_ENTITY_REF;
1736      case XML_TOK_XML_DECL:
1737	return XML_ERROR_MISPLACED_XML_PI;
1738      default:
1739	return XML_ERROR_SYNTAX;
1740      }
1741    case XML_ROLE_GROUP_OPEN:
1742      if (prologState.level >= groupSize) {
1743	if (groupSize)
1744	  groupConnector = realloc(groupConnector, groupSize *= 2);
1745	else
1746	  groupConnector = malloc(groupSize = 32);
1747	if (!groupConnector)
1748	  return XML_ERROR_NO_MEMORY;
1749      }
1750      groupConnector[prologState.level] = 0;
1751      break;
1752    case XML_ROLE_GROUP_SEQUENCE:
1753      if (groupConnector[prologState.level] == '|') {
1754	eventPtr = s;
1755	return XML_ERROR_SYNTAX;
1756      }
1757      groupConnector[prologState.level] = ',';
1758      break;
1759    case XML_ROLE_GROUP_CHOICE:
1760      if (groupConnector[prologState.level] == ',') {
1761	eventPtr = s;
1762	return XML_ERROR_SYNTAX;
1763      }
1764      groupConnector[prologState.level] = '|';
1765      break;
1766    case XML_ROLE_PARAM_ENTITY_REF:
1767      dtd.complete = 0;
1768      break;
1769    case XML_ROLE_NONE:
1770      switch (tok) {
1771      case XML_TOK_PI:
1772	eventPtr = s;
1773	eventEndPtr = next;
1774	if (!reportProcessingInstruction(parser, encoding, s, next))
1775	  return XML_ERROR_NO_MEMORY;
1776	break;
1777      }
1778      break;
1779    }
1780    if (defaultHandler) {
1781      switch (tok) {
1782      case XML_TOK_PI:
1783      case XML_TOK_BOM:
1784      case XML_TOK_XML_DECL:
1785	break;
1786      default:
1787	eventPtr = s;
1788	eventEndPtr = next;
1789	reportDefault(parser, encoding, s, next);
1790      }
1791    }
1792    s = next;
1793  }
1794  /* not reached */
1795}
1796
1797static
1798enum XML_Error epilogProcessor(XML_Parser parser,
1799			       const char *s,
1800			       const char *end,
1801			       const char **nextPtr)
1802{
1803  processor = epilogProcessor;
1804  eventPtr = s;
1805  for (;;) {
1806    const char *next;
1807    int tok = XmlPrologTok(encoding, s, end, &next);
1808    eventEndPtr = next;
1809    switch (tok) {
1810    case XML_TOK_TRAILING_CR:
1811      if (defaultHandler) {
1812	eventEndPtr = end;
1813	reportDefault(parser, encoding, s, end);
1814      }
1815      /* fall through */
1816    case XML_TOK_NONE:
1817      if (nextPtr)
1818	*nextPtr = end;
1819      return XML_ERROR_NONE;
1820    case XML_TOK_PROLOG_S:
1821    case XML_TOK_COMMENT:
1822      if (defaultHandler)
1823	reportDefault(parser, encoding, s, next);
1824      break;
1825    case XML_TOK_PI:
1826      if (!reportProcessingInstruction(parser, encoding, s, next))
1827	return XML_ERROR_NO_MEMORY;
1828      break;
1829    case XML_TOK_INVALID:
1830      eventPtr = next;
1831      return XML_ERROR_INVALID_TOKEN;
1832    case XML_TOK_PARTIAL:
1833      if (nextPtr) {
1834	*nextPtr = s;
1835	return XML_ERROR_NONE;
1836      }
1837      return XML_ERROR_UNCLOSED_TOKEN;
1838    case XML_TOK_PARTIAL_CHAR:
1839      if (nextPtr) {
1840	*nextPtr = s;
1841	return XML_ERROR_NONE;
1842      }
1843      return XML_ERROR_PARTIAL_CHAR;
1844    default:
1845      return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
1846    }
1847    eventPtr = s = next;
1848  }
1849}
1850
1851static
1852enum XML_Error errorProcessor(XML_Parser parser,
1853			      const char *s,
1854			      const char *end,
1855			      const char **nextPtr)
1856{
1857  return errorCode;
1858}
1859
1860static enum XML_Error
1861storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
1862		    const char *ptr, const char *end,
1863		    STRING_POOL *pool)
1864{
1865  enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
1866  if (result)
1867    return result;
1868  if (!isCdata && poolLength(pool) && poolLastChar(pool) == XML_T(' '))
1869    poolChop(pool);
1870  if (!poolAppendChar(pool, XML_T('\0')))
1871    return XML_ERROR_NO_MEMORY;
1872  return XML_ERROR_NONE;
1873}
1874
1875static enum XML_Error
1876appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
1877		     const char *ptr, const char *end,
1878		     STRING_POOL *pool)
1879{
1880  const ENCODING *internalEnc = XmlGetInternalEncoding();
1881  for (;;) {
1882    const char *next;
1883    int tok = XmlAttributeValueTok(enc, ptr, end, &next);
1884    switch (tok) {
1885    case XML_TOK_NONE:
1886      return XML_ERROR_NONE;
1887    case XML_TOK_INVALID:
1888      if (enc == encoding)
1889	eventPtr = next;
1890      return XML_ERROR_INVALID_TOKEN;
1891    case XML_TOK_PARTIAL:
1892      if (enc == encoding)
1893	eventPtr = ptr;
1894      return XML_ERROR_INVALID_TOKEN;
1895    case XML_TOK_CHAR_REF:
1896      {
1897	XML_Char buf[XML_ENCODE_MAX];
1898	int i;
1899	int n = XmlCharRefNumber(enc, ptr);
1900	if (n < 0) {
1901	  if (enc == encoding)
1902	    eventPtr = ptr;
1903      	  return XML_ERROR_BAD_CHAR_REF;
1904	}
1905	if (!isCdata
1906	    && n == 0x20 /* space */
1907	    && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' ')))
1908	  break;
1909	n = XmlEncode(n, (ICHAR *)buf);
1910	if (!n) {
1911	  if (enc == encoding)
1912	    eventPtr = ptr;
1913	  return XML_ERROR_BAD_CHAR_REF;
1914	}
1915	for (i = 0; i < n; i++) {
1916	  if (!poolAppendChar(pool, buf[i]))
1917	    return XML_ERROR_NO_MEMORY;
1918	}
1919      }
1920      break;
1921    case XML_TOK_DATA_CHARS:
1922      if (!poolAppend(pool, enc, ptr, next))
1923	return XML_ERROR_NO_MEMORY;
1924      break;
1925      break;
1926    case XML_TOK_TRAILING_CR:
1927      next = ptr + enc->minBytesPerChar;
1928      /* fall through */
1929    case XML_TOK_ATTRIBUTE_VALUE_S:
1930    case XML_TOK_DATA_NEWLINE:
1931      if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' ')))
1932	break;
1933      if (!poolAppendChar(pool, XML_T(' ')))
1934	return XML_ERROR_NO_MEMORY;
1935      break;
1936    case XML_TOK_ENTITY_REF:
1937      {
1938	const XML_Char *name;
1939	ENTITY *entity;
1940	XML_Char ch = XmlPredefinedEntityName(enc,
1941					      ptr + enc->minBytesPerChar,
1942					      next - enc->minBytesPerChar);
1943	if (ch) {
1944	  if (!poolAppendChar(pool, ch))
1945  	    return XML_ERROR_NO_MEMORY;
1946	  break;
1947	}
1948	name = poolStoreString(&temp2Pool, enc,
1949			       ptr + enc->minBytesPerChar,
1950			       next - enc->minBytesPerChar);
1951	if (!name)
1952	  return XML_ERROR_NO_MEMORY;
1953	entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1954	poolDiscard(&temp2Pool);
1955	if (!entity) {
1956	  if (dtd.complete) {
1957	    if (enc == encoding)
1958	      eventPtr = ptr;
1959	    return XML_ERROR_UNDEFINED_ENTITY;
1960	  }
1961	}
1962	else if (entity->open) {
1963	  if (enc == encoding)
1964	    eventPtr = ptr;
1965	  return XML_ERROR_RECURSIVE_ENTITY_REF;
1966	}
1967	else if (entity->notation) {
1968	  if (enc == encoding)
1969	    eventPtr = ptr;
1970	  return XML_ERROR_BINARY_ENTITY_REF;
1971	}
1972	else if (!entity->textPtr) {
1973	  if (enc == encoding)
1974	    eventPtr = ptr;
1975  	  return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
1976	}
1977	else {
1978	  enum XML_Error result;
1979	  const XML_Char *textEnd = entity->textPtr + entity->textLen;
1980	  entity->open = 1;
1981	  result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
1982	  entity->open = 0;
1983	  if (result)
1984	    return result;
1985	}
1986      }
1987      break;
1988    default:
1989      abort();
1990    }
1991    ptr = next;
1992  }
1993  /* not reached */
1994}
1995
1996static
1997enum XML_Error storeEntityValue(XML_Parser parser,
1998				const char *entityTextPtr,
1999				const char *entityTextEnd)
2000{
2001  /*const ENCODING *internalEnc = XmlGetInternalEncoding();*/
2002  STRING_POOL *pool = &(dtd.pool);
2003  entityTextPtr += encoding->minBytesPerChar;
2004  entityTextEnd -= encoding->minBytesPerChar;
2005  for (;;) {
2006    const char *next;
2007    int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
2008    switch (tok) {
2009    case XML_TOK_PARAM_ENTITY_REF:
2010      eventPtr = entityTextPtr;
2011      return XML_ERROR_SYNTAX;
2012    case XML_TOK_NONE:
2013      if (declEntity) {
2014	declEntity->textPtr = pool->start;
2015	declEntity->textLen = pool->ptr - pool->start;
2016	poolFinish(pool);
2017      }
2018      else
2019	poolDiscard(pool);
2020      return XML_ERROR_NONE;
2021    case XML_TOK_ENTITY_REF:
2022    case XML_TOK_DATA_CHARS:
2023      if (!poolAppend(pool, encoding, entityTextPtr, next))
2024	return XML_ERROR_NO_MEMORY;
2025      break;
2026    case XML_TOK_TRAILING_CR:
2027      next = entityTextPtr + encoding->minBytesPerChar;
2028      /* fall through */
2029    case XML_TOK_DATA_NEWLINE:
2030      if (pool->end == pool->ptr && !poolGrow(pool))
2031	return XML_ERROR_NO_MEMORY;
2032      *(pool->ptr)++ = XML_T('\n');
2033      break;
2034    case XML_TOK_CHAR_REF:
2035      {
2036	XML_Char buf[XML_ENCODE_MAX];
2037	int i;
2038	int n = XmlCharRefNumber(encoding, entityTextPtr);
2039	if (n < 0) {
2040	  eventPtr = entityTextPtr;
2041	  return XML_ERROR_BAD_CHAR_REF;
2042	}
2043	n = XmlEncode(n, (ICHAR *)buf);
2044	if (!n) {
2045	  eventPtr = entityTextPtr;
2046	  return XML_ERROR_BAD_CHAR_REF;
2047	}
2048	for (i = 0; i < n; i++) {
2049	  if (pool->end == pool->ptr && !poolGrow(pool))
2050	    return XML_ERROR_NO_MEMORY;
2051	  *(pool->ptr)++ = buf[i];
2052	}
2053      }
2054      break;
2055    case XML_TOK_PARTIAL:
2056      eventPtr = entityTextPtr;
2057      return XML_ERROR_INVALID_TOKEN;
2058    case XML_TOK_INVALID:
2059      eventPtr = next;
2060      return XML_ERROR_INVALID_TOKEN;
2061    default:
2062      abort();
2063    }
2064    entityTextPtr = next;
2065  }
2066  /* not reached */
2067}
2068
2069static void
2070normalizeLines(XML_Char *s)
2071{
2072  XML_Char *p;
2073  for (;; s++) {
2074    if (*s == XML_T('\0'))
2075      return;
2076    if (*s == XML_T('\r'))
2077      break;
2078  }
2079  p = s;
2080  do {
2081    if (*s == XML_T('\r')) {
2082      *p++ = XML_T('\n');
2083      if (*++s == XML_T('\n'))
2084        s++;
2085    }
2086    else
2087      *p++ = *s++;
2088  } while (*s);
2089  *p = XML_T('\0');
2090}
2091
2092static int
2093reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2094{
2095  const XML_Char *target;
2096  XML_Char *data;
2097  const char *tem;
2098  if (!processingInstructionHandler) {
2099    if (defaultHandler)
2100      reportDefault(parser, enc, start, end);
2101    return 1;
2102  }
2103  start += enc->minBytesPerChar * 2;
2104  tem = start + XmlNameLength(enc, start);
2105  target = poolStoreString(&tempPool, enc, start, tem);
2106  if (!target)
2107    return 0;
2108  poolFinish(&tempPool);
2109  data = poolStoreString(&tempPool, enc,
2110			XmlSkipS(enc, tem),
2111			end - enc->minBytesPerChar*2);
2112  if (!data)
2113    return 0;
2114  normalizeLines(data);
2115  processingInstructionHandler(handlerArg, target, data);
2116  poolClear(&tempPool);
2117  return 1;
2118}
2119
2120static void
2121reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
2122{
2123  if (MUST_CONVERT(enc, s)) {
2124    for (;;) {
2125      ICHAR *dataPtr = (ICHAR *)dataBuf;
2126      XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2127      if (s == end) {
2128	defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2129	break;
2130      }
2131      if (enc == encoding) {
2132	eventEndPtr = s;
2133	defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2134	eventPtr = s;
2135      }
2136      else
2137	defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2138    }
2139  }
2140  else
2141    defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
2142}
2143
2144
2145static int
2146defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
2147{
2148  DEFAULT_ATTRIBUTE *att;
2149  if (type->nDefaultAtts == type->allocDefaultAtts) {
2150    if (type->allocDefaultAtts == 0) {
2151      type->allocDefaultAtts = 8;
2152      type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2153    }
2154    else {
2155      type->allocDefaultAtts *= 2;
2156      type->defaultAtts = realloc(type->defaultAtts,
2157				  type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2158    }
2159    if (!type->defaultAtts)
2160      return 0;
2161  }
2162  att = type->defaultAtts + type->nDefaultAtts;
2163  att->id = attId;
2164  att->value = value;
2165  att->isCdata = isCdata;
2166  if (!isCdata)
2167    attId->maybeTokenized = 1;
2168  type->nDefaultAtts += 1;
2169  return 1;
2170}
2171
2172static ATTRIBUTE_ID *
2173getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2174{
2175  ATTRIBUTE_ID *id;
2176  const XML_Char *name;
2177  if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2178    return 0;
2179  name = poolStoreString(&dtd.pool, enc, start, end);
2180  if (!name)
2181    return 0;
2182  ++name;
2183  id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
2184  if (!id)
2185    return 0;
2186  if (id->name != name)
2187    poolDiscard(&dtd.pool);
2188  else
2189    poolFinish(&dtd.pool);
2190  return id;
2191}
2192
2193static
2194const XML_Char *getOpenEntityNames(XML_Parser parser)
2195{
2196  HASH_TABLE_ITER iter;
2197
2198  hashTableIterInit(&iter, &(dtd.generalEntities));
2199  for (;;) {
2200    const XML_Char *s;
2201    ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
2202    if (!e)
2203      break;
2204    if (!e->open)
2205      continue;
2206    if (poolLength(&tempPool) > 0 && !poolAppendChar(&tempPool, XML_T(' ')))
2207      return 0;
2208    for (s = e->name; *s; s++)
2209      if (!poolAppendChar(&tempPool, *s))
2210        return 0;
2211  }
2212
2213  if (!poolAppendChar(&tempPool, XML_T('\0')))
2214    return 0;
2215  return tempPool.start;
2216}
2217
2218static
2219int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames)
2220{
2221  const XML_Char *s = openEntityNames;
2222  while (*openEntityNames != XML_T('\0')) {
2223    if (*s == XML_T(' ') || *s == XML_T('\0')) {
2224      ENTITY *e;
2225      if (!poolAppendChar(&tempPool, XML_T('\0')))
2226	return 0;
2227      e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
2228      if (e)
2229	e->open = 1;
2230      if (*s == XML_T(' '))
2231	s++;
2232      openEntityNames = s;
2233      poolDiscard(&tempPool);
2234    }
2235    else {
2236      if (!poolAppendChar(&tempPool, *s))
2237	return 0;
2238      s++;
2239    }
2240  }
2241  return 1;
2242}
2243
2244
2245static
2246void normalizePublicId(XML_Char *publicId)
2247{
2248  XML_Char *p = publicId;
2249  XML_Char *s;
2250  for (s = publicId; *s; s++) {
2251    switch (*s) {
2252    case XML_T(' '):
2253    case XML_T('\r'):
2254    case XML_T('\n'):
2255      if (p != publicId && p[-1] != XML_T(' '))
2256	*p++ = XML_T(' ');
2257      break;
2258    default:
2259      *p++ = *s;
2260    }
2261  }
2262  if (p != publicId && p[-1] == XML_T(' '))
2263    --p;
2264  *p = XML_T('\0');
2265}
2266
2267static int dtdInit(DTD *p)
2268{
2269  poolInit(&(p->pool));
2270  hashTableInit(&(p->generalEntities));
2271  hashTableInit(&(p->elementTypes));
2272  hashTableInit(&(p->attributeIds));
2273  p->complete = 1;
2274  p->base = 0;
2275  return 1;
2276}
2277
2278static void dtdDestroy(DTD *p)
2279{
2280  HASH_TABLE_ITER iter;
2281  hashTableIterInit(&iter, &(p->elementTypes));
2282  for (;;) {
2283    ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2284    if (!e)
2285      break;
2286    if (e->allocDefaultAtts != 0)
2287      free(e->defaultAtts);
2288  }
2289  hashTableDestroy(&(p->generalEntities));
2290  hashTableDestroy(&(p->elementTypes));
2291  hashTableDestroy(&(p->attributeIds));
2292  poolDestroy(&(p->pool));
2293}
2294
2295/* Do a deep copy of the DTD.  Return 0 for out of memory; non-zero otherwise.
2296The new DTD has already been initialized. */
2297
2298static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
2299{
2300  HASH_TABLE_ITER iter;
2301
2302  if (oldDtd->base) {
2303    const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
2304    if (!tem)
2305      return 0;
2306    newDtd->base = tem;
2307  }
2308
2309  hashTableIterInit(&iter, &(oldDtd->attributeIds));
2310
2311  /* Copy the attribute id table. */
2312
2313  for (;;) {
2314    ATTRIBUTE_ID *newA;
2315    const XML_Char *name;
2316    const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
2317
2318    if (!oldA)
2319      break;
2320    /* Remember to allocate the scratch byte before the name. */
2321    if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
2322      return 0;
2323    name = poolCopyString(&(newDtd->pool), oldA->name);
2324    if (!name)
2325      return 0;
2326    ++name;
2327    newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
2328    if (!newA)
2329      return 0;
2330    newA->maybeTokenized = oldA->maybeTokenized;
2331  }
2332
2333  /* Copy the element type table. */
2334
2335  hashTableIterInit(&iter, &(oldDtd->elementTypes));
2336
2337  for (;;) {
2338    int i;
2339    ELEMENT_TYPE *newE;
2340    const XML_Char *name;
2341    const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2342    if (!oldE)
2343      break;
2344    name = poolCopyString(&(newDtd->pool), oldE->name);
2345    if (!name)
2346      return 0;
2347    newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
2348    if (!newE)
2349      return 0;
2350    newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
2351    if (!newE->defaultAtts)
2352      return 0;
2353    newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
2354    for (i = 0; i < newE->nDefaultAtts; i++) {
2355      newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
2356      newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
2357      newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
2358      if (!newE->defaultAtts[i].value)
2359	return 0;
2360    }
2361  }
2362
2363  /* Copy the entity table. */
2364
2365  hashTableIterInit(&iter, &(oldDtd->generalEntities));
2366
2367  for (;;) {
2368    ENTITY *newE;
2369    const XML_Char *name;
2370    const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
2371    if (!oldE)
2372      break;
2373    name = poolCopyString(&(newDtd->pool), oldE->name);
2374    if (!name)
2375      return 0;
2376    newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
2377    if (!newE)
2378      return 0;
2379    if (oldE->systemId) {
2380      const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
2381      if (!tem)
2382	return 0;
2383      newE->systemId = tem;
2384      if (oldE->base) {
2385	if (oldE->base == oldDtd->base)
2386	  newE->base = newDtd->base;
2387	tem = poolCopyString(&(newDtd->pool), oldE->base);
2388	if (!tem)
2389	  return 0;
2390	newE->base = tem;
2391      }
2392    }
2393    else {
2394      const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
2395      if (!tem)
2396	return 0;
2397      newE->textPtr = tem;
2398      newE->textLen = oldE->textLen;
2399    }
2400    if (oldE->notation) {
2401      const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
2402      if (!tem)
2403	return 0;
2404      newE->notation = tem;
2405    }
2406  }
2407
2408  newDtd->complete = oldDtd->complete;
2409  newDtd->standalone = oldDtd->standalone;
2410  return 1;
2411}
2412
2413static
2414void poolInit(STRING_POOL *pool)
2415{
2416  pool->blocks = 0;
2417  pool->freeBlocks = 0;
2418  pool->start = 0;
2419  pool->ptr = 0;
2420  pool->end = 0;
2421}
2422
2423static
2424void poolClear(STRING_POOL *pool)
2425{
2426  if (!pool->freeBlocks)
2427    pool->freeBlocks = pool->blocks;
2428  else {
2429    BLOCK *p = pool->blocks;
2430    while (p) {
2431      BLOCK *tem = p->next;
2432      p->next = pool->freeBlocks;
2433      pool->freeBlocks = p;
2434      p = tem;
2435    }
2436  }
2437  pool->blocks = 0;
2438  pool->start = 0;
2439  pool->ptr = 0;
2440  pool->end = 0;
2441}
2442
2443static
2444void poolDestroy(STRING_POOL *pool)
2445{
2446  BLOCK *p = pool->blocks;
2447  while (p) {
2448    BLOCK *tem = p->next;
2449    free(p);
2450    p = tem;
2451  }
2452  pool->blocks = 0;
2453  p = pool->freeBlocks;
2454  while (p) {
2455    BLOCK *tem = p->next;
2456    free(p);
2457    p = tem;
2458  }
2459  pool->freeBlocks = 0;
2460  pool->ptr = 0;
2461  pool->start = 0;
2462  pool->end = 0;
2463}
2464
2465static
2466XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
2467		     const char *ptr, const char *end)
2468{
2469  if (!pool->ptr && !poolGrow(pool))
2470    return 0;
2471  for (;;) {
2472    XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
2473    if (ptr == end)
2474      break;
2475    if (!poolGrow(pool))
2476      return 0;
2477  }
2478  return pool->start;
2479}
2480
2481static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
2482{
2483  do {
2484    if (!poolAppendChar(pool, *s))
2485      return 0;
2486  } while (*s++);
2487  s = pool->start;
2488  poolFinish(pool);
2489  return s;
2490}
2491
2492static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
2493{
2494  if (!pool->ptr && !poolGrow(pool))
2495    return 0;
2496  for (; n > 0; --n, s++) {
2497    if (!poolAppendChar(pool, *s))
2498      return 0;
2499
2500  }
2501  s = pool->start;
2502  poolFinish(pool);
2503  return s;
2504}
2505
2506static
2507XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
2508			  const char *ptr, const char *end)
2509{
2510  if (!poolAppend(pool, enc, ptr, end))
2511    return 0;
2512  if (pool->ptr == pool->end && !poolGrow(pool))
2513    return 0;
2514  *(pool->ptr)++ = 0;
2515  return pool->start;
2516}
2517
2518static
2519int poolGrow(STRING_POOL *pool)
2520{
2521  if (pool->freeBlocks) {
2522    if (pool->start == 0) {
2523      pool->blocks = pool->freeBlocks;
2524      pool->freeBlocks = pool->freeBlocks->next;
2525      pool->blocks->next = 0;
2526      pool->start = pool->blocks->s;
2527      pool->end = pool->start + pool->blocks->size;
2528      pool->ptr = pool->start;
2529      return 1;
2530    }
2531    if (pool->end - pool->start < pool->freeBlocks->size) {
2532      BLOCK *tem = pool->freeBlocks->next;
2533      pool->freeBlocks->next = pool->blocks;
2534      pool->blocks = pool->freeBlocks;
2535      pool->freeBlocks = tem;
2536      memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
2537      pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
2538      pool->start = pool->blocks->s;
2539      pool->end = pool->start + pool->blocks->size;
2540      return 1;
2541    }
2542  }
2543  if (pool->blocks && pool->start == pool->blocks->s) {
2544    int blockSize = (pool->end - pool->start)*2;
2545    pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
2546    if (!pool->blocks)
2547      return 0;
2548    pool->blocks->size = blockSize;
2549    pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
2550    pool->start = pool->blocks->s;
2551    pool->end = pool->start + blockSize;
2552  }
2553  else {
2554    BLOCK *tem;
2555    int blockSize = pool->end - pool->start;
2556    if (blockSize < INIT_BLOCK_SIZE)
2557      blockSize = INIT_BLOCK_SIZE;
2558    else
2559      blockSize *= 2;
2560    tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
2561    if (!tem)
2562      return 0;
2563    tem->size = blockSize;
2564    tem->next = pool->blocks;
2565    pool->blocks = tem;
2566    memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
2567    pool->ptr = tem->s + (pool->ptr - pool->start);
2568    pool->start = tem->s;
2569    pool->end = tem->s + blockSize;
2570  }
2571  return 1;
2572}
2573