1/*
2 * Summary: internals routines exported by the parser.
3 * Description: this module exports a number of internal parsing routines
4 *              they are not really all intended for applications but
5 *              can prove useful doing low level processing.
6 *
7 * Copy: See Copyright for the status of this software.
8 *
9 * Author: Daniel Veillard
10 */
11
12#ifndef __XML_PARSER_INTERNALS_H__
13#define __XML_PARSER_INTERNALS_H__
14
15#include <libxml/xmlversion.h>
16#include <libxml/parser.h>
17#include <libxml/HTMLparser.h>
18#include <libxml/chvalid.h>
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24/**
25 * xmlParserMaxDepth:
26 *
27 * arbitrary depth limit for the XML documents that we allow to
28 * process. This is not a limitation of the parser but a safety
29 * boundary feature.
30 */
31XMLPUBVAR unsigned int xmlParserMaxDepth;
32
33 /**
34  * XML_MAX_NAMELEN:
35  *
36  * Identifiers can be longer, but this will be more costly
37  * at runtime.
38  */
39#define XML_MAX_NAMELEN 100
40
41/**
42 * INPUT_CHUNK:
43 *
44 * The parser tries to always have that amount of input ready.
45 * One of the point is providing context when reporting errors.
46 */
47#define INPUT_CHUNK	250
48
49/************************************************************************
50 *									*
51 * UNICODE version of the macros.      					*
52 *									*
53 ************************************************************************/
54/**
55 * IS_BYTE_CHAR:
56 * @c:  an byte value (int)
57 *
58 * Macro to check the following production in the XML spec:
59 *
60 * [2] Char ::= #x9 | #xA | #xD | [#x20...]
61 * any byte character in the accepted range
62 */
63#define IS_BYTE_CHAR(c)	 xmlIsChar_ch(c)
64
65/**
66 * IS_CHAR:
67 * @c:  an UNICODE value (int)
68 *
69 * Macro to check the following production in the XML spec:
70 *
71 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
72 *                  | [#x10000-#x10FFFF]
73 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
74 */
75#define IS_CHAR(c)   xmlIsCharQ(c)
76
77/**
78 * IS_CHAR_CH:
79 * @c: an xmlChar (usually an unsigned char)
80 *
81 * Behaves like IS_CHAR on single-byte value
82 */
83#define IS_CHAR_CH(c)  xmlIsChar_ch(c)
84
85/**
86 * IS_BLANK:
87 * @c:  an UNICODE value (int)
88 *
89 * Macro to check the following production in the XML spec:
90 *
91 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
92 */
93#define IS_BLANK(c)  xmlIsBlankQ(c)
94
95/**
96 * IS_BLANK_CH:
97 * @c:  an xmlChar value (normally unsigned char)
98 *
99 * Behaviour same as IS_BLANK
100 */
101#define IS_BLANK_CH(c)  xmlIsBlank_ch(c)
102
103/**
104 * IS_BASECHAR:
105 * @c:  an UNICODE value (int)
106 *
107 * Macro to check the following production in the XML spec:
108 *
109 * [85] BaseChar ::= ... long list see REC ...
110 */
111#define IS_BASECHAR(c) xmlIsBaseCharQ(c)
112
113/**
114 * IS_DIGIT:
115 * @c:  an UNICODE value (int)
116 *
117 * Macro to check the following production in the XML spec:
118 *
119 * [88] Digit ::= ... long list see REC ...
120 */
121#define IS_DIGIT(c) xmlIsDigitQ(c)
122
123/**
124 * IS_DIGIT_CH:
125 * @c:  an xmlChar value (usually an unsigned char)
126 *
127 * Behaves like IS_DIGIT but with a single byte argument
128 */
129#define IS_DIGIT_CH(c)  xmlIsDigit_ch(c)
130
131/**
132 * IS_COMBINING:
133 * @c:  an UNICODE value (int)
134 *
135 * Macro to check the following production in the XML spec:
136 *
137 * [87] CombiningChar ::= ... long list see REC ...
138 */
139#define IS_COMBINING(c) xmlIsCombiningQ(c)
140
141/**
142 * IS_COMBINING_CH:
143 * @c:  an xmlChar (usually an unsigned char)
144 *
145 * Always false (all combining chars > 0xff)
146 */
147#define IS_COMBINING_CH(c) 0
148
149/**
150 * IS_EXTENDER:
151 * @c:  an UNICODE value (int)
152 *
153 * Macro to check the following production in the XML spec:
154 *
155 *
156 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
157 *                   #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
158 *                   [#x309D-#x309E] | [#x30FC-#x30FE]
159 */
160#define IS_EXTENDER(c) xmlIsExtenderQ(c)
161
162/**
163 * IS_EXTENDER_CH:
164 * @c:  an xmlChar value (usually an unsigned char)
165 *
166 * Behaves like IS_EXTENDER but with a single-byte argument
167 */
168#define IS_EXTENDER_CH(c)  xmlIsExtender_ch(c)
169
170/**
171 * IS_IDEOGRAPHIC:
172 * @c:  an UNICODE value (int)
173 *
174 * Macro to check the following production in the XML spec:
175 *
176 *
177 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
178 */
179#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)
180
181/**
182 * IS_LETTER:
183 * @c:  an UNICODE value (int)
184 *
185 * Macro to check the following production in the XML spec:
186 *
187 *
188 * [84] Letter ::= BaseChar | Ideographic
189 */
190#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
191
192/**
193 * IS_LETTER_CH:
194 * @c:  an xmlChar value (normally unsigned char)
195 *
196 * Macro behaves like IS_LETTER, but only check base chars
197 *
198 */
199#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
200
201/**
202 * IS_ASCII_LETTER:
203 * @c: an xmlChar value
204 *
205 * Macro to check [a-zA-Z]
206 *
207 */
208#define IS_ASCII_LETTER(c)	(((0x41 <= (c)) && ((c) <= 0x5a)) || \
209				 ((0x61 <= (c)) && ((c) <= 0x7a)))
210
211/**
212 * IS_ASCII_DIGIT:
213 * @c: an xmlChar value
214 *
215 * Macro to check [0-9]
216 *
217 */
218#define IS_ASCII_DIGIT(c)	((0x30 <= (c)) && ((c) <= 0x39))
219
220/**
221 * IS_PUBIDCHAR:
222 * @c:  an UNICODE value (int)
223 *
224 * Macro to check the following production in the XML spec:
225 *
226 *
227 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
228 */
229#define IS_PUBIDCHAR(c)	xmlIsPubidCharQ(c)
230
231/**
232 * IS_PUBIDCHAR_CH:
233 * @c:  an xmlChar value (normally unsigned char)
234 *
235 * Same as IS_PUBIDCHAR but for single-byte value
236 */
237#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)
238
239/**
240 * SKIP_EOL:
241 * @p:  and UTF8 string pointer
242 *
243 * Skips the end of line chars.
244 */
245#define SKIP_EOL(p) 							\
246    if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; }			\
247    if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
248
249/**
250 * MOVETO_ENDTAG:
251 * @p:  and UTF8 string pointer
252 *
253 * Skips to the next '>' char.
254 */
255#define MOVETO_ENDTAG(p)						\
256    while ((*p) && (*(p) != '>')) (p)++
257
258/**
259 * MOVETO_STARTTAG:
260 * @p:  and UTF8 string pointer
261 *
262 * Skips to the next '<' char.
263 */
264#define MOVETO_STARTTAG(p)						\
265    while ((*p) && (*(p) != '<')) (p)++
266
267/**
268 * Global variables used for predefined strings.
269 */
270XMLPUBVAR const xmlChar xmlStringText[];
271XMLPUBVAR const xmlChar xmlStringTextNoenc[];
272XMLPUBVAR const xmlChar xmlStringComment[];
273
274/*
275 * Function to finish the work of the macros where needed.
276 */
277XMLPUBFUN int XMLCALL                   xmlIsLetter     (int c);
278
279/**
280 * Parser context.
281 */
282XMLPUBFUN xmlParserCtxtPtr XMLCALL
283			xmlCreateFileParserCtxt	(const char *filename);
284XMLPUBFUN xmlParserCtxtPtr XMLCALL
285			xmlCreateURLParserCtxt	(const char *filename,
286						 int options);
287XMLPUBFUN xmlParserCtxtPtr XMLCALL
288			xmlCreateMemoryParserCtxt(const char *buffer,
289						 int size);
290XMLPUBFUN xmlParserCtxtPtr XMLCALL
291			xmlCreateEntityParserCtxt(const xmlChar *URL,
292						 const xmlChar *ID,
293						 const xmlChar *base);
294XMLPUBFUN int XMLCALL
295			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
296						 xmlCharEncoding enc);
297XMLPUBFUN int XMLCALL
298			xmlSwitchToEncoding	(xmlParserCtxtPtr ctxt,
299					 xmlCharEncodingHandlerPtr handler);
300XMLPUBFUN int XMLCALL
301			xmlSwitchInputEncoding	(xmlParserCtxtPtr ctxt,
302						 xmlParserInputPtr input,
303					 xmlCharEncodingHandlerPtr handler);
304
305#ifdef IN_LIBXML
306/* internal error reporting */
307XMLPUBFUN void XMLCALL
308			__xmlErrEncoding	(xmlParserCtxtPtr ctxt,
309						 xmlParserErrors xmlerr,
310						 const char *msg,
311						 const xmlChar * str1,
312						 const xmlChar * str2);
313#endif
314
315/**
316 * Input Streams.
317 */
318XMLPUBFUN xmlParserInputPtr XMLCALL
319			xmlNewStringInputStream	(xmlParserCtxtPtr ctxt,
320						 const xmlChar *buffer);
321XMLPUBFUN xmlParserInputPtr XMLCALL
322			xmlNewEntityInputStream	(xmlParserCtxtPtr ctxt,
323						 xmlEntityPtr entity);
324XMLPUBFUN int XMLCALL
325			xmlPushInput		(xmlParserCtxtPtr ctxt,
326						 xmlParserInputPtr input);
327XMLPUBFUN xmlChar XMLCALL
328			xmlPopInput		(xmlParserCtxtPtr ctxt);
329XMLPUBFUN void XMLCALL
330			xmlFreeInputStream	(xmlParserInputPtr input);
331XMLPUBFUN xmlParserInputPtr XMLCALL
332			xmlNewInputFromFile	(xmlParserCtxtPtr ctxt,
333						 const char *filename);
334XMLPUBFUN xmlParserInputPtr XMLCALL
335			xmlNewInputStream	(xmlParserCtxtPtr ctxt);
336
337/**
338 * Namespaces.
339 */
340XMLPUBFUN xmlChar * XMLCALL
341			xmlSplitQName		(xmlParserCtxtPtr ctxt,
342						 const xmlChar *name,
343						 xmlChar **prefix);
344
345/**
346 * Generic production rules.
347 */
348XMLPUBFUN const xmlChar * XMLCALL
349			xmlParseName		(xmlParserCtxtPtr ctxt);
350XMLPUBFUN xmlChar * XMLCALL
351			xmlParseNmtoken		(xmlParserCtxtPtr ctxt);
352XMLPUBFUN xmlChar * XMLCALL
353			xmlParseEntityValue	(xmlParserCtxtPtr ctxt,
354						 xmlChar **orig);
355XMLPUBFUN xmlChar * XMLCALL
356			xmlParseAttValue	(xmlParserCtxtPtr ctxt);
357XMLPUBFUN xmlChar * XMLCALL
358			xmlParseSystemLiteral	(xmlParserCtxtPtr ctxt);
359XMLPUBFUN xmlChar * XMLCALL
360			xmlParsePubidLiteral	(xmlParserCtxtPtr ctxt);
361XMLPUBFUN void XMLCALL
362			xmlParseCharData	(xmlParserCtxtPtr ctxt,
363						 int cdata);
364XMLPUBFUN xmlChar * XMLCALL
365			xmlParseExternalID	(xmlParserCtxtPtr ctxt,
366						 xmlChar **publicID,
367						 int strict);
368XMLPUBFUN void XMLCALL
369			xmlParseComment		(xmlParserCtxtPtr ctxt);
370XMLPUBFUN const xmlChar * XMLCALL
371			xmlParsePITarget	(xmlParserCtxtPtr ctxt);
372XMLPUBFUN void XMLCALL
373			xmlParsePI		(xmlParserCtxtPtr ctxt);
374XMLPUBFUN void XMLCALL
375			xmlParseNotationDecl	(xmlParserCtxtPtr ctxt);
376XMLPUBFUN void XMLCALL
377			xmlParseEntityDecl	(xmlParserCtxtPtr ctxt);
378XMLPUBFUN int XMLCALL
379			xmlParseDefaultDecl	(xmlParserCtxtPtr ctxt,
380						 xmlChar **value);
381XMLPUBFUN xmlEnumerationPtr XMLCALL
382			xmlParseNotationType	(xmlParserCtxtPtr ctxt);
383XMLPUBFUN xmlEnumerationPtr XMLCALL
384			xmlParseEnumerationType	(xmlParserCtxtPtr ctxt);
385XMLPUBFUN int XMLCALL
386			xmlParseEnumeratedType	(xmlParserCtxtPtr ctxt,
387						 xmlEnumerationPtr *tree);
388XMLPUBFUN int XMLCALL
389			xmlParseAttributeType	(xmlParserCtxtPtr ctxt,
390						 xmlEnumerationPtr *tree);
391XMLPUBFUN void XMLCALL
392			xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
393XMLPUBFUN xmlElementContentPtr XMLCALL
394			xmlParseElementMixedContentDecl
395						(xmlParserCtxtPtr ctxt,
396						 int inputchk);
397XMLPUBFUN xmlElementContentPtr XMLCALL
398			xmlParseElementChildrenContentDecl
399						(xmlParserCtxtPtr ctxt,
400						 int inputchk);
401XMLPUBFUN int XMLCALL
402			xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
403						 const xmlChar *name,
404						 xmlElementContentPtr *result);
405XMLPUBFUN int XMLCALL
406			xmlParseElementDecl	(xmlParserCtxtPtr ctxt);
407XMLPUBFUN void XMLCALL
408			xmlParseMarkupDecl	(xmlParserCtxtPtr ctxt);
409XMLPUBFUN int XMLCALL
410			xmlParseCharRef		(xmlParserCtxtPtr ctxt);
411XMLPUBFUN xmlEntityPtr XMLCALL
412			xmlParseEntityRef	(xmlParserCtxtPtr ctxt);
413XMLPUBFUN void XMLCALL
414			xmlParseReference	(xmlParserCtxtPtr ctxt);
415XMLPUBFUN void XMLCALL
416			xmlParsePEReference	(xmlParserCtxtPtr ctxt);
417XMLPUBFUN void XMLCALL
418			xmlParseDocTypeDecl	(xmlParserCtxtPtr ctxt);
419#ifdef LIBXML_SAX1_ENABLED
420XMLPUBFUN const xmlChar * XMLCALL
421			xmlParseAttribute	(xmlParserCtxtPtr ctxt,
422						 xmlChar **value);
423XMLPUBFUN const xmlChar * XMLCALL
424			xmlParseStartTag	(xmlParserCtxtPtr ctxt);
425XMLPUBFUN void XMLCALL
426			xmlParseEndTag		(xmlParserCtxtPtr ctxt);
427#endif /* LIBXML_SAX1_ENABLED */
428XMLPUBFUN void XMLCALL
429			xmlParseCDSect		(xmlParserCtxtPtr ctxt);
430XMLPUBFUN void XMLCALL
431			xmlParseContent		(xmlParserCtxtPtr ctxt);
432XMLPUBFUN void XMLCALL
433			xmlParseElement		(xmlParserCtxtPtr ctxt);
434XMLPUBFUN xmlChar * XMLCALL
435			xmlParseVersionNum	(xmlParserCtxtPtr ctxt);
436XMLPUBFUN xmlChar * XMLCALL
437			xmlParseVersionInfo	(xmlParserCtxtPtr ctxt);
438XMLPUBFUN xmlChar * XMLCALL
439			xmlParseEncName		(xmlParserCtxtPtr ctxt);
440XMLPUBFUN const xmlChar * XMLCALL
441			xmlParseEncodingDecl	(xmlParserCtxtPtr ctxt);
442XMLPUBFUN int XMLCALL
443			xmlParseSDDecl		(xmlParserCtxtPtr ctxt);
444XMLPUBFUN void XMLCALL
445			xmlParseXMLDecl		(xmlParserCtxtPtr ctxt);
446XMLPUBFUN void XMLCALL
447			xmlParseTextDecl	(xmlParserCtxtPtr ctxt);
448XMLPUBFUN void XMLCALL
449			xmlParseMisc		(xmlParserCtxtPtr ctxt);
450XMLPUBFUN void XMLCALL
451			xmlParseExternalSubset	(xmlParserCtxtPtr ctxt,
452						 const xmlChar *ExternalID,
453						 const xmlChar *SystemID);
454/**
455 * XML_SUBSTITUTE_NONE:
456 *
457 * If no entities need to be substituted.
458 */
459#define XML_SUBSTITUTE_NONE	0
460/**
461 * XML_SUBSTITUTE_REF:
462 *
463 * Whether general entities need to be substituted.
464 */
465#define XML_SUBSTITUTE_REF	1
466/**
467 * XML_SUBSTITUTE_PEREF:
468 *
469 * Whether parameter entities need to be substituted.
470 */
471#define XML_SUBSTITUTE_PEREF	2
472/**
473 * XML_SUBSTITUTE_BOTH:
474 *
475 * Both general and parameter entities need to be substituted.
476 */
477#define XML_SUBSTITUTE_BOTH 	3
478
479XMLPUBFUN xmlChar * XMLCALL
480		xmlStringDecodeEntities		(xmlParserCtxtPtr ctxt,
481						 const xmlChar *str,
482						 int what,
483						 xmlChar end,
484						 xmlChar  end2,
485						 xmlChar end3);
486XMLPUBFUN xmlChar * XMLCALL
487		xmlStringLenDecodeEntities	(xmlParserCtxtPtr ctxt,
488						 const xmlChar *str,
489						 int len,
490						 int what,
491						 xmlChar end,
492						 xmlChar  end2,
493						 xmlChar end3);
494
495/*
496 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.
497 */
498XMLPUBFUN int XMLCALL			nodePush		(xmlParserCtxtPtr ctxt,
499						 xmlNodePtr value);
500XMLPUBFUN xmlNodePtr XMLCALL		nodePop			(xmlParserCtxtPtr ctxt);
501XMLPUBFUN int XMLCALL			inputPush		(xmlParserCtxtPtr ctxt,
502						 xmlParserInputPtr value);
503XMLPUBFUN xmlParserInputPtr XMLCALL	inputPop		(xmlParserCtxtPtr ctxt);
504XMLPUBFUN const xmlChar * XMLCALL	namePop			(xmlParserCtxtPtr ctxt);
505XMLPUBFUN int XMLCALL			namePush		(xmlParserCtxtPtr ctxt,
506						 const xmlChar *value);
507
508/*
509 * other commodities shared between parser.c and parserInternals.
510 */
511XMLPUBFUN int XMLCALL			xmlSkipBlankChars	(xmlParserCtxtPtr ctxt);
512XMLPUBFUN int XMLCALL			xmlStringCurrentChar	(xmlParserCtxtPtr ctxt,
513						 const xmlChar *cur,
514						 int *len);
515XMLPUBFUN void XMLCALL			xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
516XMLPUBFUN int XMLCALL			xmlCheckLanguageID	(const xmlChar *lang);
517
518/*
519 * Really core function shared with HTML parser.
520 */
521XMLPUBFUN int XMLCALL			xmlCurrentChar		(xmlParserCtxtPtr ctxt,
522						 int *len);
523XMLPUBFUN int XMLCALL		xmlCopyCharMultiByte	(xmlChar *out,
524						 int val);
525XMLPUBFUN int XMLCALL			xmlCopyChar		(int len,
526						 xmlChar *out,
527						 int val);
528XMLPUBFUN void XMLCALL			xmlNextChar		(xmlParserCtxtPtr ctxt);
529XMLPUBFUN void XMLCALL			xmlParserInputShrink	(xmlParserInputPtr in);
530
531#ifdef LIBXML_HTML_ENABLED
532/*
533 * Actually comes from the HTML parser but launched from the init stuff.
534 */
535XMLPUBFUN void XMLCALL			htmlInitAutoClose	(void);
536XMLPUBFUN htmlParserCtxtPtr XMLCALL	htmlCreateFileParserCtxt(const char *filename,
537	                                         const char *encoding);
538#endif
539
540/*
541 * Specific function to keep track of entities references
542 * and used by the XSLT debugger.
543 */
544#ifdef LIBXML_LEGACY_ENABLED
545/**
546 * xmlEntityReferenceFunc:
547 * @ent: the entity
548 * @firstNode:  the fist node in the chunk
549 * @lastNode:  the last nod in the chunk
550 *
551 * Callback function used when one needs to be able to track back the
552 * provenance of a chunk of nodes inherited from an entity replacement.
553 */
554typedef	void	(*xmlEntityReferenceFunc)	(xmlEntityPtr ent,
555						 xmlNodePtr firstNode,
556						 xmlNodePtr lastNode);
557
558XMLPUBFUN void XMLCALL		xmlSetEntityReferenceFunc	(xmlEntityReferenceFunc func);
559
560XMLPUBFUN xmlChar * XMLCALL
561			xmlParseQuotedString	(xmlParserCtxtPtr ctxt);
562XMLPUBFUN void XMLCALL
563                        xmlParseNamespace       (xmlParserCtxtPtr ctxt);
564XMLPUBFUN xmlChar * XMLCALL
565			xmlNamespaceParseNSDef	(xmlParserCtxtPtr ctxt);
566XMLPUBFUN xmlChar * XMLCALL
567			xmlScanName		(xmlParserCtxtPtr ctxt);
568XMLPUBFUN xmlChar * XMLCALL
569			xmlNamespaceParseNCName	(xmlParserCtxtPtr ctxt);
570XMLPUBFUN void XMLCALL	xmlParserHandleReference(xmlParserCtxtPtr ctxt);
571XMLPUBFUN xmlChar * XMLCALL
572			xmlNamespaceParseQName	(xmlParserCtxtPtr ctxt,
573						 xmlChar **prefix);
574/**
575 * Entities
576 */
577XMLPUBFUN xmlChar * XMLCALL
578		xmlDecodeEntities		(xmlParserCtxtPtr ctxt,
579						 int len,
580						 int what,
581						 xmlChar end,
582						 xmlChar  end2,
583						 xmlChar end3);
584XMLPUBFUN void XMLCALL
585			xmlHandleEntity		(xmlParserCtxtPtr ctxt,
586						 xmlEntityPtr entity);
587
588#endif /* LIBXML_LEGACY_ENABLED */
589
590#ifdef IN_LIBXML
591/*
592 * internal only
593 */
594XMLPUBFUN void XMLCALL
595	xmlErrMemory		(xmlParserCtxtPtr ctxt,
596				 const char *extra);
597#endif
598
599#ifdef __cplusplus
600}
601#endif
602#endif /* __XML_PARSER_INTERNALS_H__ */
603