Lines Matching refs:ctxt

56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
58 static void htmlParseComment(htmlParserCtxtPtr ctxt);
68 * @ctxt: an HTML parser context
74 htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
76 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
77 (ctxt->instate == XML_PARSER_EOF))
79 if (ctxt != NULL) {
80 ctxt->errNo = XML_ERR_NO_MEMORY;
81 ctxt->instate = XML_PARSER_EOF;
82 ctxt->disableSAX = 1;
85 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
90 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
97 * @ctxt: an HTML parser context
106 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
109 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
110 (ctxt->instate == XML_PARSER_EOF))
112 if (ctxt != NULL)
113 ctxt->errNo = error;
114 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
119 if (ctxt != NULL)
120 ctxt->wellFormed = 0;
125 * @ctxt: an HTML parser context
133 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
136 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
137 (ctxt->instate == XML_PARSER_EOF))
139 if (ctxt != NULL)
140 ctxt->errNo = error;
141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
144 if (ctxt != NULL)
145 ctxt->wellFormed = 0;
156 * @ctxt: an HTML parser context
164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
166 if (ctxt->nameNr >= ctxt->nameMax) {
167 ctxt->nameMax *= 2;
168 ctxt->nameTab = (const xmlChar * *)
169 xmlRealloc((xmlChar * *)ctxt->nameTab,
170 ctxt->nameMax *
171 sizeof(ctxt->nameTab[0]));
172 if (ctxt->nameTab == NULL) {
173 htmlErrMemory(ctxt, NULL);
177 ctxt->nameTab[ctxt->nameNr] = value;
178 ctxt->name = value;
179 return (ctxt->nameNr++);
183 * @ctxt: an HTML parser context
190 htmlnamePop(htmlParserCtxtPtr ctxt)
194 if (ctxt->nameNr <= 0)
196 ctxt->nameNr--;
197 if (ctxt->nameNr < 0)
199 if (ctxt->nameNr > 0)
200 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
202 ctxt->name = NULL;
203 ret = ctxt->nameTab[ctxt->nameNr];
204 ctxt->nameTab[ctxt->nameNr] = NULL;
237 #define UPPER (toupper(*ctxt->input->cur))
239 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
241 #define NXT(val) ctxt->input->cur[(val)]
243 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
245 #define CUR_PTR ctxt->input->cur
247 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
248 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
249 xmlParserInputShrink(ctxt->input)
251 #define GROW if ((ctxt->progressive == 0) && \
252 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
253 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
255 #define CURRENT ((int) (*ctxt->input->cur))
257 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
261 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
262 #define CUR ((int) (*ctxt->input->cur))
263 #define NEXT xmlNextChar(ctxt)
265 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
266 #define NXT(val) ctxt->input->cur[(val)]
267 #define CUR_PTR ctxt->input->cur
271 if (*(ctxt->input->cur) == '\n') { \
272 ctxt->input->line++; ctxt->input->col = 1; \
273 } else ctxt->input->col++; \
274 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
280 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
283 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
284 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
292 * @ctxt: the HTML parser context
305 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
306 if (ctxt->instate == XML_PARSER_EOF)
309 if (ctxt->token != 0) {
311 return(ctxt->token);
313 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
325 const unsigned char *cur = ctxt->input->cur;
332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
338 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
343 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
367 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
374 return((int) *ctxt->input->cur);
383 if ((int) *ctxt->input->cur < 0x80)
384 return((int) *ctxt->input->cur);
389 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
390 ctxt->charset = XML_CHAR_ENCODING_UTF8;
391 return(xmlCurrentChar(ctxt, len));
405 ctxt->input->cur[0], ctxt->input->cur[1],
406 ctxt->input->cur[2], ctxt->input->cur[3]);
407 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
412 ctxt->charset = XML_CHAR_ENCODING_8859_1;
414 return((int) *ctxt->input->cur);
419 * @ctxt: the HTML parser context
427 htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
430 while (IS_BLANK_CH(*(ctxt->input->cur))) {
431 if ((*ctxt->input->cur == 0) &&
432 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
433 xmlPopInput(ctxt);
435 if (*(ctxt->input->cur) == '\n') {
436 ctxt->input->line++; ctxt->input->col = 1;
437 } else ctxt->input->col++;
438 ctxt->input->cur++;
439 ctxt->nbChars++;
440 if (*ctxt->input->cur == 0)
441 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1136 * @ctxt: an HTML parser context
1143 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1150 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1152 if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1160 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1166 while (!xmlStrEqual(newtag, ctxt->name)) {
1167 info = htmlTagLookup(ctxt->name);
1169 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1171 newtag, ctxt->name);
1173 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1174 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1175 htmlnamePop(ctxt);
1181 * @ctxt: an HTML parser context
1186 htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1190 if (ctxt->nameNr == 0)
1192 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1193 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1194 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1195 htmlnamePop(ctxt);
1201 * @ctxt: an HTML parser context
1212 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1214 while ((newtag != NULL) && (ctxt->name != NULL) &&
1215 (htmlCheckAutoClose(newtag, ctxt->name))) {
1216 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1217 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1218 htmlnamePop(ctxt);
1221 htmlAutoCloseOnEnd(ctxt);
1224 while ((newtag == NULL) && (ctxt->name != NULL) &&
1225 ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
1226 (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
1227 (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1228 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1229 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1230 htmlnamePop(ctxt);
1288 * @ctxt: an HTML parser context
1296 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1301 if (ctxt->nameNr <= 0) {
1302 htmlnamePush(ctxt, BAD_CAST"html");
1303 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1304 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1308 if ((ctxt->nameNr <= 1) &&
1319 htmlnamePush(ctxt, BAD_CAST"head");
1320 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1321 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1326 for (i = 0;i < ctxt->nameNr;i++) {
1327 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1330 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1335 htmlnamePush(ctxt, BAD_CAST"body");
1336 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1337 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1343 * @ctxt: an HTML parser context
1353 htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1357 if (ctxt == NULL)
1359 tag = ctxt->name;
1361 htmlAutoClose(ctxt, BAD_CAST"p");
1362 htmlCheckImplied(ctxt, BAD_CAST"p");
1363 htmlnamePush(ctxt, BAD_CAST"p");
1364 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1365 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1372 htmlAutoClose(ctxt, BAD_CAST"p");
1373 htmlCheckImplied(ctxt, BAD_CAST"p");
1374 htmlnamePush(ctxt, BAD_CAST"p");
1375 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1376 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1715 htmlErrMemory(ctxt, "growing buffer\n"); \
1978 * @ctxt: an HTML parser context
1984 htmlNewInputStream(htmlParserCtxtPtr ctxt) {
1989 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2031 * @ctxt: an HTML parser context
2040 static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2051 if (ctxt->name == NULL)
2053 if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2055 if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2059 if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2060 dtd = xmlGetIntSubset(ctxt->myDoc);
2068 if (ctxt->node == NULL) return(0);
2069 lastChild = xmlGetLastChild(ctxt->node);
2073 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2074 (ctxt->node->content != NULL)) return(0);
2078 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2174 static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2178 * @ctxt: an HTML parser context
2187 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2204 return(xmlDictLookup(ctxt->dict, loc, i));
2209 * @ctxt: an HTML parser context
2217 htmlParseName(htmlParserCtxtPtr ctxt) {
2227 in = ctxt->input->cur;
2239 count = in - ctxt->input->cur;
2240 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2241 ctxt->input->cur = in;
2242 ctxt->nbChars += count;
2243 ctxt->input->col += count;
2247 return(htmlParseNameComplex(ctxt));
2251 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2281 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2287 * @ctxt: an HTML parser context
2297 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2311 htmlErrMemory(ctxt, "buffer allocation failed\n");
2327 c = htmlParseCharRef(ctxt);
2348 ent = htmlParseEntityRef(ctxt, &name);
2426 * @ctxt: an HTML parser context
2437 htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2442 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
2446 name = htmlParseName(ctxt);
2448 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2463 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2476 * @ctxt: an HTML parser context
2481 * asked for ctxt->replaceEntities != 0
2487 htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2492 ret = htmlParseHTMLAttribute(ctxt, '"');
2494 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2500 ret = htmlParseHTMLAttribute(ctxt, '\'');
2502 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2510 ret = htmlParseHTMLAttribute(ctxt, 0);
2512 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2521 * @ctxt: an HTML parser context
2531 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2541 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2553 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2560 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2569 * @ctxt: an HTML parser context
2579 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2590 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2602 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2609 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2618 * @ctxt: an HTML parser context
2638 htmlParseScript(htmlParserCtxtPtr ctxt) {
2648 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2649 if (ctxt->sax->cdataBlock!= NULL) {
2653 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2654 } else if (ctxt->sax->characters != NULL) {
2655 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2659 htmlParseComment(ctxt);
2674 if (ctxt->recovery) {
2675 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
2676 xmlStrlen(ctxt->name)) == 0)
2680 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2682 ctxt->name, NULL);
2694 if (ctxt->sax->cdataBlock!= NULL) {
2698 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2699 } else if (ctxt->sax->characters != NULL) {
2700 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2715 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2716 if (ctxt->sax->cdataBlock!= NULL) {
2720 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2721 } else if (ctxt->sax->characters != NULL) {
2722 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2730 * @ctxt: an HTML parser context
2739 htmlParseCharData(htmlParserCtxtPtr ctxt) {
2746 while (((cur != '<') || (ctxt->token == '<')) &&
2747 ((cur != '&') || (ctxt->token == '&')) &&
2754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2755 if (areBlanks(ctxt, buf, nbchar)) {
2756 if (ctxt->sax->ignorableWhitespace != NULL)
2757 ctxt->sax->ignorableWhitespace(ctxt->userData,
2760 htmlCheckParagraph(ctxt);
2761 if (ctxt->sax->characters != NULL)
2762 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2782 if (areBlanks(ctxt, buf, nbchar)) {
2783 if (ctxt->sax->ignorableWhitespace != NULL)
2784 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2786 htmlCheckParagraph(ctxt);
2787 if (ctxt->sax->characters != NULL)
2788 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2796 ctxt->instate = XML_PARSER_EOF;
2802 * @ctxt: an HTML parser context
2818 htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
2826 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2830 URI = htmlParseSystemLiteral(ctxt);
2832 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
2840 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2844 *publicID = htmlParsePubidLiteral(ctxt);
2846 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
2852 URI = htmlParseSystemLiteral(ctxt);
2860 * @ctxt: an XML parser context
2867 htmlParsePI(htmlParserCtxtPtr ctxt) {
2877 state = ctxt->instate;
2878 ctxt->instate = XML_PARSER_PI;
2889 target = htmlParseName(ctxt);
2897 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2898 (ctxt->sax->processingInstruction != NULL))
2899 ctxt->sax->processingInstruction(ctxt->userData,
2901 ctxt->instate = state;
2906 htmlErrMemory(ctxt, NULL);
2907 ctxt->instate = state;
2912 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2924 htmlErrMemory(ctxt, NULL);
2926 ctxt->instate = state;
2947 htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
2955 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2956 (ctxt->sax->processingInstruction != NULL))
2957 ctxt->sax->processingInstruction(ctxt->userData,
2962 htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
2965 ctxt->instate = state;
2971 * @ctxt: an HTML parser context
2978 htmlParseComment(htmlParserCtxtPtr ctxt) {
2993 state = ctxt->instate;
2994 ctxt->instate = XML_PARSER_COMMENT;
2999 htmlErrMemory(ctxt, "buffer allocation failed\n");
3000 ctxt->instate = state;
3019 htmlErrMemory(ctxt, "growing buffer failed\n");
3020 ctxt->instate = state;
3040 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3045 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3046 (!ctxt->disableSAX))
3047 ctxt->sax->comment(ctxt->userData, buf);
3050 ctxt->instate = state;
3055 * @ctxt: an HTML parser context
3065 htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3068 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3069 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3085 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3100 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3110 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3119 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3129 * @ctxt: an HTML parser context
3138 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3153 name = htmlParseName(ctxt);
3155 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3168 URI = htmlParseExternalID(ctxt, &ExternalID);
3175 htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3184 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3185 (!ctxt->disableSAX))
3186 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3197 * @ctxt: an HTML parser context
3217 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3222 name = htmlParseHTMLName(ctxt);
3224 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3236 val = htmlParseAttValue(ctxt);
3240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3241 ctxt->sax->warning(ctxt->userData,
3251 * @ctxt: an HTML parser context
3260 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3263 if ((ctxt == NULL) || (attvalue == NULL))
3267 if (ctxt->input->encoding != NULL)
3284 if (ctxt->input->encoding != NULL)
3285 xmlFree((xmlChar *) ctxt->input->encoding);
3286 ctxt->input->encoding = xmlStrdup(encoding);
3293 xmlSwitchEncoding(ctxt, enc);
3294 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3301 xmlSwitchToEncoding(ctxt, handler);
3302 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3304 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
3308 if ((ctxt->input->buf != NULL) &&
3309 (ctxt->input->buf->encoder != NULL) &&
3310 (ctxt->input->buf->raw != NULL) &&
3311 (ctxt->input->buf->buffer != NULL)) {
3318 processed = ctxt->input->cur - ctxt->input->base;
3319 xmlBufferShrink(ctxt->input->buf->buffer, processed);
3320 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
3321 ctxt->input->buf->buffer,
3322 ctxt->input->buf->raw);
3324 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3328 ctxt->input->base =
3329 ctxt->input->cur = ctxt->input->buf->buffer->content;
3336 * @ctxt: an HTML parser context
3342 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3348 if ((ctxt == NULL) || (atts == NULL))
3363 htmlCheckEncoding(ctxt, content);
3369 * @ctxt: an HTML parser context
3388 htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3398 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3399 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3406 atts = ctxt->atts;
3407 maxatts = ctxt->maxatts;
3410 name = htmlParseHTMLName(ctxt);
3412 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3426 htmlAutoClose(ctxt, name);
3431 htmlCheckImplied(ctxt, name);
3437 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3438 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3443 if ((ctxt->nameNr != 1) &&
3445 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3452 for (indx = 0;indx < ctxt->nameNr;indx++) {
3453 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3454 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3473 long cons = ctxt->nbChars;
3476 attname = htmlParseAttribute(ctxt, &attvalue);
3484 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
3500 htmlErrMemory(ctxt, NULL);
3505 ctxt->atts = atts;
3506 ctxt->maxatts = maxatts;
3514 htmlErrMemory(ctxt, NULL);
3520 ctxt->atts = atts;
3521 ctxt->maxatts = maxatts;
3541 if (cons == ctxt->nbChars) {
3542 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3553 htmlCheckMeta(ctxt, atts);
3558 htmlnamePush(ctxt, name);
3559 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3561 ctxt->sax->startElement(ctxt->userData, name, atts);
3563 ctxt->sax->startElement(ctxt->userData, name, NULL);
3578 * @ctxt: an HTML parser context
3592 htmlParseEndTag(htmlParserCtxtPtr ctxt)
3599 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
3605 name = htmlParseHTMLName(ctxt);
3614 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3616 if (ctxt->recovery) {
3632 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
3633 if (xmlStrEqual(name, ctxt->nameTab[i]))
3637 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3647 htmlAutoCloseOnClose(ctxt, name);
3654 if (!xmlStrEqual(name, ctxt->name)) {
3655 if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
3656 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3658 name, ctxt->name);
3665 oldname = ctxt->name;
3667 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3668 ctxt->sax->endElement(ctxt->userData, name);
3669 htmlnamePop(ctxt);
3681 * @ctxt: an HTML parser context
3688 htmlParseReference(htmlParserCtxtPtr ctxt) {
3698 c = htmlParseCharRef(ctxt);
3712 htmlCheckParagraph(ctxt);
3713 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3714 ctxt->sax->characters(ctxt->userData, out, i);
3716 ent = htmlParseEntityRef(ctxt, &name);
3718 htmlCheckParagraph(ctxt);
3719 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3720 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3724 htmlCheckParagraph(ctxt);
3725 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
3726 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3727 ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
3728 /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
3749 htmlCheckParagraph(ctxt);
3750 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3751 ctxt->sax->characters(ctxt->userData, out, i);
3758 * @ctxt: an HTML parser context
3764 htmlParseContent(htmlParserCtxtPtr ctxt) {
3768 currentNode = xmlStrdup(ctxt->name);
3769 depth = ctxt->nameNr;
3771 long cons = ctxt->nbChars;
3778 if (htmlParseEndTag(ctxt) &&
3779 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
3791 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
3792 (!xmlStrEqual(currentNode, ctxt->name)))
3803 htmlParseScript(ctxt);
3813 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3816 htmlParseDocTypeDecl(ctxt);
3824 htmlParseComment(ctxt);
3831 htmlParsePI(ctxt);
3838 htmlParseElement(ctxt);
3846 htmlParseReference(ctxt);
3853 htmlAutoCloseOnEnd(ctxt);
3861 htmlParseCharData(ctxt);
3864 if (cons == ctxt->nbChars) {
3865 if (ctxt->node != NULL) {
3866 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3880 * @ctxt: an HTML parser context
3886 __htmlParseContent(void *ctxt) {
3887 if (ctxt != NULL)
3888 htmlParseContent((htmlParserCtxtPtr) ctxt);
3893 * @ctxt: an HTML parser context
3903 htmlParseElement(htmlParserCtxtPtr ctxt) {
3912 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3913 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3918 if (ctxt->record_info) {
3919 node_info.begin_pos = ctxt->input->consumed +
3920 (CUR_PTR - ctxt->input->base);
3921 node_info.begin_line = ctxt->input->line;
3924 failed = htmlParseStartTag(ctxt);
3925 name = ctxt->name;
3937 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
3946 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3947 ctxt->sax->endElement(ctxt->userData, name);
3948 htmlnamePop(ctxt);
3955 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3961 if (xmlStrEqual(name, ctxt->name)) {
3962 nodePop(ctxt);
3963 htmlnamePop(ctxt);
3969 if (ctxt->record_info) {
3970 node_info.end_pos = ctxt->input->consumed +
3971 (CUR_PTR - ctxt->input->base);
3972 node_info.end_line = ctxt->input->line;
3973 node_info.node = ctxt->node;
3974 xmlParserAddNodeInfo(ctxt, &node_info);
3983 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3984 ctxt->sax->endElement(ctxt->userData, name);
3985 htmlnamePop(ctxt);
3992 currentNode = xmlStrdup(ctxt->name);
3993 depth = ctxt->nameNr;
3995 oldptr = ctxt->input->cur;
3996 htmlParseContent(ctxt);
3997 if (oldptr==ctxt->input->cur) break;
3998 if (ctxt->nameNr < depth) break;
4004 if ( currentNode != NULL && ctxt->record_info ) {
4005 node_info.end_pos = ctxt->input->consumed +
4006 (CUR_PTR - ctxt->input->base);
4007 node_info.end_line = ctxt->input->line;
4008 node_info.node = ctxt->node;
4009 xmlParserAddNodeInfo(ctxt, &node_info);
4012 htmlAutoCloseOnEnd(ctxt);
4021 * @ctxt: an HTML parser context
4031 htmlParseDocument(htmlParserCtxtPtr ctxt) {
4038 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4039 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4043 ctxt->html = 1;
4048 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4049 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4056 htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4060 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4061 ctxt->sax->startDocument(ctxt->userData);
4070 htmlParseComment(ctxt);
4071 htmlParsePI(ctxt);
4085 htmlParseDocTypeDecl(ctxt);
4095 htmlParseComment(ctxt);
4096 htmlParsePI(ctxt);
4103 htmlParseContent(ctxt);
4109 htmlAutoCloseOnEnd(ctxt);
4115 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4116 ctxt->sax->endDocument(ctxt->userData);
4118 if (ctxt->myDoc != NULL) {
4119 dtd = xmlGetIntSubset(ctxt->myDoc);
4121 ctxt->myDoc->intSubset =
4122 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4126 if (! ctxt->wellFormed) return(-1);
4139 * @ctxt: an HTML parser context
4147 htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
4151 if (ctxt == NULL) return(-1);
4152 memset(ctxt, 0, sizeof(htmlParserCtxt));
4154 ctxt->dict = xmlDictCreate();
4155 if (ctxt->dict == NULL) {
4168 ctxt->inputTab = (htmlParserInputPtr *)
4170 if (ctxt->inputTab == NULL) {
4172 ctxt->inputNr = 0;
4173 ctxt->inputMax = 0;
4174 ctxt->input = NULL;
4177 ctxt->inputNr = 0;
4178 ctxt->inputMax = 5;
4179 ctxt->input = NULL;
4180 ctxt->version = NULL;
4181 ctxt->encoding = NULL;
4182 ctxt->standalone = -1;
4183 ctxt->instate = XML_PARSER_START;
4186 ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
4187 if (ctxt->nodeTab == NULL) {
4189 ctxt->nodeNr = 0;
4190 ctxt->nodeMax = 0;
4191 ctxt->node = NULL;
4192 ctxt->inputNr = 0;
4193 ctxt->inputMax = 0;
4194 ctxt->input = NULL;
4197 ctxt->nodeNr = 0;
4198 ctxt->nodeMax = 10;
4199 ctxt->node = NULL;
4202 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4203 if (ctxt->nameTab == NULL) {
4205 ctxt->nameNr = 0;
4206 ctxt->nameMax = 10;
4207 ctxt->name = NULL;
4208 ctxt->nodeNr = 0;
4209 ctxt->nodeMax = 0;
4210 ctxt->node = NULL;
4211 ctxt->inputNr = 0;
4212 ctxt->inputMax = 0;
4213 ctxt->input = NULL;
4216 ctxt->nameNr = 0;
4217 ctxt->nameMax = 10;
4218 ctxt->name = NULL;
4220 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4222 ctxt->sax = sax;
4225 ctxt->userData = ctxt;
4226 ctxt->myDoc = NULL;
4227 ctxt->wellFormed = 1;
4228 ctxt->replaceEntities = 0;
4229 ctxt->linenumbers = xmlLineNumbersDefaultValue;
4230 ctxt->html = 1;
4231 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
4232 ctxt->vctxt.userData = ctxt;
4233 ctxt->vctxt.error = xmlParserValidityError;
4234 ctxt->vctxt.warning = xmlParserValidityWarning;
4235 ctxt->record_info = 0;
4236 ctxt->validate = 0;
4237 ctxt->nbChars = 0;
4238 ctxt->checkIndex = 0;
4239 ctxt->catalogs = NULL;
4240 xmlInitNodeInfoSeq(&ctxt->node_seq);
4246 * @ctxt: an HTML parser context
4249 * document in ctxt->myDoc is not freed.
4253 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4255 xmlFreeParserCtxt(ctxt);
4269 xmlParserCtxtPtr ctxt;
4271 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4272 if (ctxt == NULL) {
4276 memset(ctxt, 0, sizeof(xmlParserCtxt));
4277 if (htmlInitParserCtxt(ctxt) < 0) {
4278 htmlFreeParserCtxt(ctxt);
4281 return(ctxt);
4295 xmlParserCtxtPtr ctxt;
4304 ctxt = htmlNewParserCtxt();
4305 if (ctxt == NULL)
4311 input = xmlNewInputStream(ctxt);
4313 xmlFreeParserCtxt(ctxt);
4323 inputPush(ctxt, input);
4324 return(ctxt);
4341 htmlParserCtxtPtr ctxt;
4346 ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
4352 if (ctxt->input->encoding != NULL)
4353 xmlFree((xmlChar *) ctxt->input->encoding);
4354 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
4361 xmlSwitchEncoding(ctxt, enc);
4362 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4363 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4373 xmlSwitchToEncoding(ctxt, handler);
4375 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4381 return(ctxt);
4393 * @ctxt: an HTML parser context
4401 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
4410 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
4417 in = ctxt->input;
4421 if (ctxt->checkIndex > base)
4422 base = ctxt->checkIndex;
4459 ctxt->checkIndex = 0;
4477 ctxt->checkIndex = base;
4494 * @ctxt: an HTML parser context
4502 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
4509 switch (ctxt->instate) {
4563 in = ctxt->input;
4570 htmlAutoCloseOnEnd(ctxt);
4571 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
4575 ctxt->instate = XML_PARSER_EOF;
4576 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4577 ctxt->sax->endDocument(ctxt->userData);
4588 switch (ctxt->instate) {
4606 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4607 ctxt->sax->setDocumentLocator(ctxt->userData,
4609 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
4610 (!ctxt->disableSAX))
4611 ctxt->sax->startDocument(ctxt->userData);
4621 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4627 htmlParseDocTypeDecl(ctxt);
4628 ctxt->instate = XML_PARSER_PROLOG;
4634 ctxt->instate = XML_PARSER_MISC;
4654 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4660 htmlParseComment(ctxt);
4661 ctxt->instate = XML_PARSER_MISC;
4664 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4670 htmlParsePI(ctxt);
4671 ctxt->instate = XML_PARSER_MISC;
4678 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4684 htmlParseDocTypeDecl(ctxt);
4685 ctxt->instate = XML_PARSER_PROLOG;
4694 ctxt->instate = XML_PARSER_START_TAG;
4714 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4720 htmlParseComment(ctxt);
4721 ctxt->instate = XML_PARSER_PROLOG;
4724 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4730 htmlParsePI(ctxt);
4731 ctxt->instate = XML_PARSER_PROLOG;
4736 ctxt->instate = XML_PARSER_START_TAG;
4752 htmlParseCharData(ctxt);
4761 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4767 htmlParseComment(ctxt);
4768 ctxt->instate = XML_PARSER_EPILOG;
4771 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4777 htmlParsePI(ctxt);
4778 ctxt->instate = XML_PARSER_EPILOG;
4783 ctxt->errNo = XML_ERR_DOCUMENT_END;
4784 ctxt->wellFormed = 0;
4785 ctxt->instate = XML_PARSER_EOF;
4790 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4791 ctxt->sax->endDocument(ctxt->userData);
4804 ctxt->instate = XML_PARSER_CONTENT;
4812 ctxt->instate = XML_PARSER_END_TAG;
4813 ctxt->checkIndex = 0;
4821 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4824 failed = htmlParseStartTag(ctxt);
4825 name = ctxt->name;
4838 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4847 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4848 ctxt->sax->endElement(ctxt->userData, name);
4849 htmlnamePop(ctxt);
4850 ctxt->instate = XML_PARSER_CONTENT;
4861 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4868 if (xmlStrEqual(name, ctxt->name)) {
4869 nodePop(ctxt);
4870 htmlnamePop(ctxt);
4873 ctxt->instate = XML_PARSER_CONTENT;
4885 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4886 ctxt->sax->endElement(ctxt->userData, name);
4887 htmlnamePop(ctxt);
4889 ctxt->instate = XML_PARSER_CONTENT;
4901 if (ctxt->token != 0) {
4904 chr[0] = (xmlChar) ctxt->token;
4905 htmlCheckParagraph(ctxt);
4906 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4907 ctxt->sax->characters(ctxt->userData, chr, 1);
4908 ctxt->token = 0;
4909 ctxt->checkIndex = 0;
4914 if (ctxt->sax != NULL) {
4916 if (ctxt->sax->ignorableWhitespace != NULL)
4917 ctxt->sax->ignorableWhitespace(
4918 ctxt->userData, &cur, 1);
4920 htmlCheckParagraph(ctxt);
4921 if (ctxt->sax->characters != NULL)
4922 ctxt->sax->characters(
4923 ctxt->userData, &cur, 1);
4926 ctxt->token = 0;
4927 ctxt->checkIndex = 0;
4936 cons = ctxt->nbChars;
4937 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
4938 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
4943 (htmlParseLookupSequence(ctxt, '<', '/', 0, 0) < 0))
4945 htmlParseScript(ctxt);
4947 ctxt->instate = XML_PARSER_END_TAG;
4948 ctxt->checkIndex = 0;
4965 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4967 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4970 htmlParseDocTypeDecl(ctxt);
4975 ctxt, '-', '-', '>', 1) < 0))
4981 htmlParseComment(ctxt);
4982 ctxt->instate = XML_PARSER_CONTENT;
4985 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4991 htmlParsePI(ctxt);
4992 ctxt->instate = XML_PARSER_CONTENT;
4996 ctxt->instate = XML_PARSER_END_TAG;
4997 ctxt->checkIndex = 0;
5004 ctxt->instate = XML_PARSER_START_TAG;
5005 ctxt->checkIndex = 0;
5013 (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0))
5020 htmlParseReference(ctxt);
5029 (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
5031 ctxt->checkIndex = 0;
5036 htmlParseCharData(ctxt);
5039 if (cons == ctxt->nbChars) {
5040 if (ctxt->node != NULL) {
5041 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5055 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5057 htmlParseEndTag(ctxt);
5058 if (ctxt->nameNr == 0) {
5059 ctxt->instate = XML_PARSER_EPILOG;
5061 ctxt->instate = XML_PARSER_CONTENT;
5063 ctxt->checkIndex = 0;
5070 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5073 ctxt->instate = XML_PARSER_CONTENT;
5074 ctxt->checkIndex = 0;
5081 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5084 ctxt->instate = XML_PARSER_CONTENT;
5085 ctxt->checkIndex = 0;
5092 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5095 ctxt->instate = XML_PARSER_CONTENT;
5096 ctxt->checkIndex = 0;
5103 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5106 ctxt->instate = XML_PARSER_CONTENT;
5107 ctxt->checkIndex = 0;
5114 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5117 ctxt->instate = XML_PARSER_CONTENT;
5118 ctxt->checkIndex = 0;
5125 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5128 ctxt->instate = XML_PARSER_CONTENT;
5129 ctxt->checkIndex = 0;
5136 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5139 ctxt->instate = XML_PARSER_START_TAG;
5140 ctxt->checkIndex = 0;
5147 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5150 ctxt->instate = XML_PARSER_CONTENT;
5151 ctxt->checkIndex = 0;
5158 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5161 ctxt->instate = XML_PARSER_CONTENT;
5162 ctxt->checkIndex = 0;
5169 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5172 ctxt->instate = XML_PARSER_CONTENT;
5173 ctxt->checkIndex = 0;
5184 htmlAutoCloseOnEnd(ctxt);
5185 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5189 ctxt->instate = XML_PARSER_EOF;
5190 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5191 ctxt->sax->endDocument(ctxt->userData);
5194 if ((ctxt->myDoc != NULL) &&
5195 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5196 (ctxt->instate == XML_PARSER_EPILOG))) {
5198 dtd = xmlGetIntSubset(ctxt->myDoc);
5200 ctxt->myDoc->intSubset =
5201 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5213 * @ctxt: an HTML parser context
5223 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5225 if ((ctxt == NULL) || (ctxt->input == NULL)) {
5226 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5230 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5231 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5232 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5233 int cur = ctxt->input->cur - ctxt->input->base;
5236 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5238 ctxt->errNo = XML_PARSER_EOF;
5239 ctxt->disableSAX = 1;
5242 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5243 ctxt->input->cur = ctxt->input->base + cur;
5244 ctxt->input->end =
5245 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5251 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5252 htmlParseTryOrFinish(ctxt, terminate);
5254 } else if (ctxt->instate != XML_PARSER_EOF) {
5255 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
5256 xmlParserInputBufferPtr in = ctxt->input->buf;
5263 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5270 htmlParseTryOrFinish(ctxt, terminate);
5272 if ((ctxt->instate != XML_PARSER_EOF) &&
5273 (ctxt->instate != XML_PARSER_EPILOG) &&
5274 (ctxt->instate != XML_PARSER_MISC)) {
5275 ctxt->errNo = XML_ERR_DOCUMENT_END;
5276 ctxt->wellFormed = 0;
5278 if (ctxt->instate != XML_PARSER_EOF) {
5279 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5280 ctxt->sax->endDocument(ctxt->userData);
5282 ctxt->instate = XML_PARSER_EOF;
5284 return((xmlParserErrors) ctxt->errNo);
5312 htmlParserCtxtPtr ctxt;
5321 ctxt = htmlNewParserCtxt();
5322 if (ctxt == NULL) {
5327 ctxt->charset=XML_CHAR_ENCODING_UTF8;
5329 if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
5330 xmlFree(ctxt->sax);
5331 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
5332 if (ctxt->sax == NULL) {
5334 xmlFree(ctxt);
5337 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
5339 ctxt->userData = user_data;
5342 ctxt->directory = NULL;
5344 ctxt->directory = xmlParserGetDirectory(filename);
5347 inputStream = htmlNewInputStream(ctxt);
5349 xmlFreeParserCtxt(ctxt);
5365 inputPush(ctxt, inputStream);
5367 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5368 (ctxt->input->buf != NULL)) {
5369 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5370 int cur = ctxt->input->cur - ctxt->input->base;
5372 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5374 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5375 ctxt->input->cur = ctxt->input->base + cur;
5376 ctxt->input->end =
5377 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5383 return(ctxt);
5405 htmlParserCtxtPtr ctxt;
5412 ctxt = htmlCreateDocParserCtxt(cur, encoding);
5413 if (ctxt == NULL) return(NULL);
5415 if (ctxt->sax != NULL) xmlFree (ctxt->sax);
5416 ctxt->sax = sax;
5417 ctxt->userData = userData;
5420 htmlParseDocument(ctxt);
5421 ret = ctxt->myDoc;
5423 ctxt->sax = NULL;
5424 ctxt->userData = NULL;
5426 htmlFreeParserCtxt(ctxt);
5461 htmlParserCtxtPtr ctxt;
5470 ctxt = htmlNewParserCtxt();
5471 if (ctxt == NULL) {
5481 xmlFreeParserCtxt(ctxt);
5485 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
5488 xmlFreeParserCtxt(ctxt);
5492 inputPush(ctxt, inputStream);
5500 htmlCheckEncoding (ctxt, content);
5505 return(ctxt);
5528 htmlParserCtxtPtr ctxt;
5533 ctxt = htmlCreateFileParserCtxt(filename, encoding);
5534 if (ctxt == NULL) return(NULL);
5536 oldsax = ctxt->sax;
5537 ctxt->sax = sax;
5538 ctxt->userData = userData;
5541 htmlParseDocument(ctxt);
5543 ret = ctxt->myDoc;
5545 ctxt->sax = oldsax;
5546 ctxt->userData = NULL;
5548 htmlFreeParserCtxt(ctxt);
5717 * @ctxt: an HTML parser context
5722 htmlCtxtReset(htmlParserCtxtPtr ctxt)
5727 if (ctxt == NULL)
5730 dict = ctxt->dict;
5732 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
5735 ctxt->inputNr = 0;
5736 ctxt->input = NULL;
5738 ctxt->spaceNr = 0;
5739 if (ctxt->spaceTab != NULL) {
5740 ctxt->spaceTab[0] = -1;
5741 ctxt->space = &ctxt->spaceTab[0];
5743 ctxt->space = NULL;
5747 ctxt->nodeNr = 0;
5748 ctxt->node = NULL;
5750 ctxt->nameNr = 0;
5751 ctxt->name = NULL;
5753 DICT_FREE(ctxt->version);
5754 ctxt->version = NULL;
5755 DICT_FREE(ctxt->encoding);
5756 ctxt->encoding = NULL;
5757 DICT_FREE(ctxt->directory);
5758 ctxt->directory = NULL;
5759 DICT_FREE(ctxt->extSubURI);
5760 ctxt->extSubURI = NULL;
5761 DICT_FREE(ctxt->extSubSystem);
5762 ctxt->extSubSystem = NULL;
5763 if (ctxt->myDoc != NULL)
5764 xmlFreeDoc(ctxt->myDoc);
5765 ctxt->myDoc = NULL;
5767 ctxt->standalone = -1;
5768 ctxt->hasExternalSubset = 0;
5769 ctxt->hasPErefs = 0;
5770 ctxt->html = 1;
5771 ctxt->external = 0;
5772 ctxt->instate = XML_PARSER_START;
5773 ctxt->token = 0;
5775 ctxt->wellFormed = 1;
5776 ctxt->nsWellFormed = 1;
5777 ctxt->valid = 1;
5778 ctxt->vctxt.userData = ctxt;
5779 ctxt->vctxt.error = xmlParserValidityError;
5780 ctxt->vctxt.warning = xmlParserValidityWarning;
5781 ctxt->record_info = 0;
5782 ctxt->nbChars = 0;
5783 ctxt->checkIndex = 0;
5784 ctxt->inSubset = 0;
5785 ctxt->errNo = XML_ERR_OK;
5786 ctxt->depth = 0;
5787 ctxt->charset = XML_CHAR_ENCODING_UTF8;
5788 ctxt->catalogs = NULL;
5789 xmlInitNodeInfoSeq(&ctxt->node_seq);
5791 if (ctxt->attsDefault != NULL) {
5792 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
5793 ctxt->attsDefault = NULL;
5795 if (ctxt->attsSpecial != NULL) {
5796 xmlHashFree(ctxt->attsSpecial, NULL);
5797 ctxt->attsSpecial = NULL;
5803 * @ctxt: an HTML parser context
5812 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
5814 if (ctxt == NULL)
5818 ctxt->sax->warning = NULL;
5819 ctxt->vctxt.warning = NULL;
5821 ctxt->options |= XML_PARSE_NOWARNING;
5824 ctxt->sax->error = NULL;
5825 ctxt->vctxt.error = NULL;
5826 ctxt->sax->fatalError = NULL;
5828 ctxt->options |= XML_PARSE_NOERROR;
5831 ctxt->pedantic = 1;
5833 ctxt->options |= XML_PARSE_PEDANTIC;
5835 ctxt->pedantic = 0;
5837 ctxt->keepBlanks = 0;
5838 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
5840 ctxt->options |= XML_PARSE_NOBLANKS;
5842 ctxt->keepBlanks = 1;
5844 ctxt->recovery = 1;
5846 ctxt->recovery = 0;
5848 ctxt->options |= HTML_PARSE_COMPACT;
5851 ctxt->dictNames = 0;
5857 * @ctxt: an HTML parser context
5868 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
5873 htmlCtxtUseOptions(ctxt, options);
5874 ctxt->html = 1;
5880 xmlSwitchToEncoding(ctxt, hdlr);
5882 if ((URL != NULL) && (ctxt->input != NULL) &&
5883 (ctxt->input->filename == NULL))
5884 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
5885 htmlParseDocument(ctxt);
5886 ret = ctxt->myDoc;
5887 ctxt->myDoc = NULL;
5889 if ((ctxt->dictNames) &&
5891 (ret->dict == ctxt->dict))
5892 ctxt->dict = NULL;
5893 xmlFreeParserCtxt(ctxt);
5912 htmlParserCtxtPtr ctxt;
5917 ctxt = xmlCreateDocParserCtxt(cur);
5918 if (ctxt == NULL)
5920 return (htmlDoRead(ctxt, URL, encoding, options, 0));
5936 htmlParserCtxtPtr ctxt;
5938 ctxt = htmlCreateFileParserCtxt(filename, encoding);
5939 if (ctxt == NULL)
5941 return (htmlDoRead(ctxt, NULL, NULL, options, 0));
5959 htmlParserCtxtPtr ctxt;
5961 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
5962 if (ctxt == NULL)
5964 if (ctxt->sax != NULL)
5965 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
5966 return (htmlDoRead(ctxt, URL, encoding, options, 0));
5983 htmlParserCtxtPtr ctxt;
5993 ctxt = xmlNewParserCtxt();
5994 if (ctxt == NULL) {
5998 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6001 xmlFreeParserCtxt(ctxt);
6004 inputPush(ctxt, stream);
6005 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6025 htmlParserCtxtPtr ctxt;
6036 ctxt = xmlNewParserCtxt();
6037 if (ctxt == NULL) {
6041 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6044 xmlFreeParserCtxt(ctxt);
6047 inputPush(ctxt, stream);
6048 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6053 * @ctxt: an HTML parser context
6060 * This reuses the existing @ctxt parser context
6065 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6072 if (ctxt == NULL)
6075 htmlCtxtReset(ctxt);
6077 stream = xmlNewStringInputStream(ctxt, cur);
6081 inputPush(ctxt, stream);
6082 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6087 * @ctxt: an HTML parser context
6093 * This reuses the existing @ctxt parser context
6098 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6105 if (ctxt == NULL)
6108 htmlCtxtReset(ctxt);
6110 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
6114 inputPush(ctxt, stream);
6115 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
6120 * @ctxt: an HTML parser context
6128 * This reuses the existing @ctxt parser context
6133 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6139 if (ctxt == NULL)
6144 htmlCtxtReset(ctxt);
6151 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6157 inputPush(ctxt, stream);
6158 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6163 * @ctxt: an HTML parser context
6170 * This reuses the existing @ctxt parser context
6175 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6183 if (ctxt == NULL)
6186 htmlCtxtReset(ctxt);
6192 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6197 inputPush(ctxt, stream);
6198 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6203 * @ctxt: an HTML parser context
6212 * This reuses the existing @ctxt parser context
6217 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
6227 if (ctxt == NULL)
6230 htmlCtxtReset(ctxt);
6236 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6241 inputPush(ctxt, stream);
6242 return (htmlDoRead(ctxt, URL, encoding, options, 1));