1/* 2The contents of this file are subject to the Mozilla Public License 3Version 1.0 (the "License"); you may not use this file except in 4compliance with the License. You may obtain a copy of the License at 5http://www.mozilla.org/MPL/ 6 7Software distributed under the License is distributed on an "AS IS" 8basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 9License for the specific language governing rights and limitations 10under the License. 11 12The Original Code is expat. 13 14The Initial Developer of the Original Code is James Clark. 15Portions created by James Clark are Copyright (C) 1998 16James Clark. All Rights Reserved. 17 18Contributor(s): 19*/ 20 21#include <tcl.h> 22#include "xmlparse.h" 23#include "filemap.h" 24#include "codepage.h" 25 26#include <stdio.h> 27#include <stdlib.h> 28#include <stddef.h> 29#include <string.h> 30#include <fcntl.h> 31 32#ifdef _MSC_VER 33#include <io.h> 34#endif 35 36#ifdef _POSIX_SOURCE 37#include <unistd.h> 38#endif 39 40#ifndef O_BINARY 41#ifdef _O_BINARY 42#define O_BINARY _O_BINARY 43#else 44#define O_BINARY 0 45#endif 46#endif 47 48#ifdef _MSC_VER 49#include <crtdbg.h> 50#endif 51 52#ifdef _DEBUG 53#define READ_SIZE 16 54#else 55#define READ_SIZE (1024*8) 56#endif 57 58#ifdef XML_UNICODE 59#ifndef XML_UNICODE_WCHAR_T 60#error xmlwf requires a 16-bit Unicode-compatible wchar_t 61#endif 62#define T(x) L ## x 63#define ftprintf fwprintf 64#define tfopen _wfopen 65#define fputts fputws 66#define puttc putwc 67#define tcscmp wcscmp 68#define tcscpy wcscpy 69#define tcscat wcscat 70#define tcschr wcschr 71#define tcsrchr wcsrchr 72#define tcslen wcslen 73#define tperror _wperror 74#define topen _wopen 75#define tmain wmain 76#define tremove _wremove 77#else /* not XML_UNICODE */ 78#define T(x) x 79#define ftprintf fprintf 80#define tfopen fopen 81#define fputts fputs 82#define puttc putc 83#define tcscmp strcmp 84#define tcscpy strcpy 85#define tcscat strcat 86#define tcschr strchr 87#define tcsrchr strrchr 88#define tcslen strlen 89#define tperror perror 90#define topen open 91#define tmain main 92#define tremove remove 93#endif /* not XML_UNICODE */ 94 95static void characterData(void *userData, const XML_Char *s, size_t len) 96{ 97 FILE *fp = userData; 98 for (; len > 0; --len, ++s) { 99 switch (*s) { 100 case T('&'): 101 fputts(T("&"), fp); 102 break; 103 case T('<'): 104 fputts(T("<"), fp); 105 break; 106 case T('>'): 107 fputts(T(">"), fp); 108 break; 109 case T('"'): 110 fputts(T("""), fp); 111 break; 112 case 9: 113 case 10: 114 case 13: 115 ftprintf(fp, T("&#%d;"), *s); 116 break; 117 default: 118 puttc(*s, fp); 119 break; 120 } 121 } 122} 123 124/* Lexicographically comparing UTF-8 encoded attribute values, 125is equivalent to lexicographically comparing based on the character number. */ 126 127static int attcmp(const void *att1, const void *att2) 128{ 129 return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2); 130} 131 132static void startElement(void *userData, const XML_Char *name, const XML_Char **atts) 133{ 134 size_t nAtts; 135 const XML_Char **p; 136 FILE *fp = userData; 137 puttc(T('<'), fp); 138 fputts(name, fp); 139 140 p = atts; 141 while (*p) 142 ++p; 143 nAtts = (p - atts) >> 1; 144 if (nAtts > 1) 145 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp); 146 while (*atts) { 147 puttc(T(' '), fp); 148 fputts(*atts++, fp); 149 puttc(T('='), fp); 150 puttc(T('"'), fp); 151 characterData(userData, *atts, tcslen(*atts)); 152 puttc(T('"'), fp); 153 atts++; 154 } 155 puttc(T('>'), fp); 156} 157 158static void endElement(void *userData, const XML_Char *name) 159{ 160 FILE *fp = userData; 161 puttc(T('<'), fp); 162 puttc(T('/'), fp); 163 fputts(name, fp); 164 puttc(T('>'), fp); 165} 166 167static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data) 168{ 169 FILE *fp = userData; 170 puttc(T('<'), fp); 171 puttc(T('?'), fp); 172 fputts(target, fp); 173 puttc(T(' '), fp); 174 fputts(data, fp); 175 puttc(T('?'), fp); 176 puttc(T('>'), fp); 177} 178 179static void defaultCharacterData(XML_Parser parser, const XML_Char *s, size_t len) 180{ 181 XML_DefaultCurrent(parser); 182} 183 184static void defaultStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) 185{ 186 XML_DefaultCurrent(parser); 187} 188 189static void defaultEndElement(XML_Parser parser, const XML_Char *name) 190{ 191 XML_DefaultCurrent(parser); 192} 193 194static void defaultProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) 195{ 196 XML_DefaultCurrent(parser); 197} 198 199static void markup(XML_Parser parser, const XML_Char *s, int len) 200{ 201 FILE *fp = XML_GetUserData(parser); 202 for (; len > 0; --len, ++s) 203 puttc(*s, fp); 204} 205 206static 207void metaLocation(XML_Parser parser) 208{ 209 const XML_Char *uri = XML_GetBase(parser); 210 if (uri) 211 ftprintf(XML_GetUserData(parser), T(" uri=\"%s\""), uri); 212 ftprintf(XML_GetUserData(parser), 213 T(" byte=\"%ld\" line=\"%d\" col=\"%d\""), 214 XML_GetCurrentByteIndex(parser), 215 XML_GetCurrentLineNumber(parser), 216 XML_GetCurrentColumnNumber(parser)); 217} 218 219static 220void metaStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) 221{ 222 FILE *fp = XML_GetUserData(parser); 223 ftprintf(fp, T("<starttag name=\"%s\""), name); 224 metaLocation(parser); 225 if (*atts) { 226 fputts(T(">\n"), fp); 227 do { 228 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]); 229 characterData(fp, atts[1], tcslen(atts[1])); 230 fputts(T("\"/>\n"), fp); 231 } while (*(atts += 2)); 232 fputts(T("</starttag>\n"), fp); 233 } 234 else 235 fputts(T("/>\n"), fp); 236} 237 238static 239void metaEndElement(XML_Parser parser, const XML_Char *name) 240{ 241 FILE *fp = XML_GetUserData(parser); 242 ftprintf(fp, T("<endtag name=\"%s\""), name); 243 metaLocation(parser); 244 fputts(T("/>\n"), fp); 245} 246 247static 248void metaProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) 249{ 250 FILE *fp = XML_GetUserData(parser); 251 ftprintf(fp, T("<pi target=\"%s\" data=\""), target); 252 characterData(fp, data, tcslen(data)); 253 puttc(T('"'), fp); 254 metaLocation(parser); 255 fputts(T("/>\n"), fp); 256} 257 258static 259void metaCharacterData(XML_Parser parser, const XML_Char *s, size_t len) 260{ 261 FILE *fp = XML_GetUserData(parser); 262 fputts(T("<chars str=\""), fp); 263 characterData(fp, s, len); 264 puttc(T('"'), fp); 265 metaLocation(parser); 266 fputts(T("/>\n"), fp); 267} 268 269static 270void metaUnparsedEntityDecl(XML_Parser parser, 271 const XML_Char *entityName, 272 const XML_Char *base, 273 const XML_Char *systemId, 274 const XML_Char *publicId, 275 const XML_Char *notationName) 276{ 277 FILE *fp = XML_GetUserData(parser); 278 ftprintf(fp, T("<entity name=\"%s\""), entityName); 279 if (publicId) 280 ftprintf(fp, T(" public=\"%s\""), publicId); 281 fputts(T(" system=\""), fp); 282 characterData(fp, systemId, tcslen(systemId)); 283 puttc(T('"'), fp); 284 ftprintf(fp, T(" notation=\"%s\""), notationName); 285 metaLocation(parser); 286 fputts(T("/>\n"), fp); 287} 288 289static 290void metaNotationDecl(XML_Parser parser, 291 const XML_Char *notationName, 292 const XML_Char *base, 293 const XML_Char *systemId, 294 const XML_Char *publicId) 295{ 296 FILE *fp = XML_GetUserData(parser); 297 ftprintf(fp, T("<notation name=\"%s\""), notationName); 298 if (publicId) 299 ftprintf(fp, T(" public=\"%s\""), publicId); 300 if (systemId) { 301 fputts(T(" system=\""), fp); 302 characterData(fp, systemId, tcslen(systemId)); 303 puttc(T('"'), fp); 304 } 305 metaLocation(parser); 306 fputts(T("/>\n"), fp); 307} 308 309typedef struct { 310 XML_Parser parser; 311 int *retPtr; 312} PROCESS_ARGS; 313 314static 315void reportError(XML_Parser parser, const XML_Char *filename) 316{ 317 int code = XML_GetErrorCode(parser); 318 const XML_Char *message = XML_ErrorString(code); 319 if (message) 320 ftprintf(stdout, T("%s:%d:%d: %s\n"), 321 filename, 322 XML_GetErrorLineNumber(parser), 323 XML_GetErrorColumnNumber(parser), 324 message); 325 else 326 ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code); 327} 328 329static 330void processFile(const void *data, size_t size, const XML_Char *filename, void *args) 331{ 332 XML_Parser parser = ((PROCESS_ARGS *)args)->parser; 333 int *retPtr = ((PROCESS_ARGS *)args)->retPtr; 334 if (!XML_Parse(parser, data, size, 1)) { 335 reportError(parser, filename); 336 *retPtr = 0; 337 } 338 else 339 *retPtr = 1; 340} 341 342static 343int isAsciiLetter(XML_Char c) 344{ 345 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); 346} 347 348static 349const XML_Char *resolveSystemId(const XML_Char *base, const XML_Char *systemId, XML_Char **toFree) 350{ 351 XML_Char *s; 352 *toFree = 0; 353 if (!base 354 || *systemId == T('/') 355#ifdef WIN32 356 || *systemId == T('\\') 357 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) 358#endif 359 ) 360 return systemId; 361 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)*sizeof(XML_Char)); 362 if (!*toFree) 363 return systemId; 364 tcscpy(*toFree, base); 365 s = *toFree; 366 if (tcsrchr(s, T('/'))) 367 s = tcsrchr(s, T('/')) + 1; 368#ifdef WIN32 369 if (tcsrchr(s, T('\\'))) 370 s = tcsrchr(s, T('\\')) + 1; 371#endif 372 tcscpy(s, systemId); 373 return *toFree; 374} 375 376static 377int externalEntityRefFilemap(XML_Parser parser, 378 const XML_Char *openEntityNames, 379 const XML_Char *base, 380 const XML_Char *systemId, 381 const XML_Char *publicId) 382{ 383 int result; 384 XML_Char *s; 385 const XML_Char *filename; 386 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, openEntityNames, 0); 387 PROCESS_ARGS args; 388 args.retPtr = &result; 389 args.parser = entParser; 390 filename = resolveSystemId(base, systemId, &s); 391 XML_SetBase(entParser, filename); 392 if (!filemap(filename, processFile, &args)) 393 result = 0; 394 free(s); 395 XML_ParserFree(entParser); 396 return result; 397} 398 399static 400int processStream(const XML_Char *filename, XML_Parser parser) 401{ 402 int fd = topen(filename, O_BINARY|O_RDONLY); 403 if (fd < 0) { 404 tperror(filename); 405 return 0; 406 } 407 for (;;) { 408 size_t nread; 409 char *buf = XML_GetBuffer(parser, READ_SIZE); 410 if (!buf) { 411 close(fd); 412 ftprintf(stderr, T("%s: out of memory\n"), filename); 413 return 0; 414 } 415 nread = read(fd, buf, READ_SIZE); 416 if (nread < 0) { 417 tperror(filename); 418 close(fd); 419 return 0; 420 } 421 if (!XML_ParseBuffer(parser, nread, nread == 0)) { 422 reportError(parser, filename); 423 close(fd); 424 return 0; 425 } 426 if (nread == 0) { 427 close(fd); 428 break;; 429 } 430 } 431 return 1; 432} 433 434static 435int externalEntityRefStream(XML_Parser parser, 436 const XML_Char *openEntityNames, 437 const XML_Char *base, 438 const XML_Char *systemId, 439 const XML_Char *publicId) 440{ 441 XML_Char *s; 442 const XML_Char *filename; 443 int ret; 444 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, openEntityNames, 0); 445 filename = resolveSystemId(base, systemId, &s); 446 XML_SetBase(entParser, filename); 447 ret = processStream(filename, entParser); 448 free(s); 449 XML_ParserFree(entParser); 450 return ret; 451} 452 453static 454int unknownEncodingConvert(void *data, const char *p) 455{ 456 return codepageConvert(*(int *)data, p); 457} 458 459static 460int unknownEncoding(void *userData, 461 const XML_Char *name, 462 XML_Encoding *info) 463{ 464 int cp; 465 static const XML_Char prefixL[] = T("windows-"); 466 static const XML_Char prefixU[] = T("WINDOWS-"); 467 int i; 468 469 for (i = 0; prefixU[i]; i++) 470 if (name[i] != prefixU[i] && name[i] != prefixL[i]) 471 return 0; 472 473 cp = 0; 474 for (; name[i]; i++) { 475 static const XML_Char digits[] = T("0123456789"); 476 const XML_Char *s = tcschr(digits, name[i]); 477 if (!s) 478 return 0; 479 cp *= 10; 480 cp += s - digits; 481 if (cp >= 0x10000) 482 return 0; 483 } 484 if (!codepageMap(cp, info->map)) 485 return 0; 486 info->convert = unknownEncodingConvert; 487 /* We could just cast the code page integer to a void *, 488 and avoid the use of release. */ 489 info->release = free; 490 info->data = malloc(sizeof(int)); 491 if (!info->data) 492 return 0; 493 *(int *)info->data = cp; 494 return 1; 495} 496 497static 498void usage(const XML_Char *prog) 499{ 500 ftprintf(stderr, T("usage: %s [-r] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog); 501 exit(1); 502} 503 504int tmain(int argc, XML_Char **argv) 505{ 506 int i; 507 const XML_Char *outputDir = 0; 508 const XML_Char *encoding = 0; 509 int useFilemap = 1; 510 int processExternalEntities = 0; 511 int windowsCodePages = 0; 512 int outputType = 0; 513 514#ifdef _MSC_VER 515 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); 516#endif 517 518 i = 1; 519 while (i < argc && argv[i][0] == T('-')) { 520 int j; 521 if (argv[i][1] == T('-') && argv[i][2] == T('\0')) { 522 i++; 523 break; 524 } 525 j = 1; 526 if (argv[i][j] == T('r')) { 527 useFilemap = 0; 528 j++; 529 } 530 if (argv[i][j] == T('x')) { 531 processExternalEntities = 1; 532 j++; 533 } 534 if (argv[i][j] == T('w')) { 535 windowsCodePages = 1; 536 j++; 537 } 538 if (argv[i][j] == T('m')) { 539 outputType = 'm'; 540 j++; 541 } 542 if (argv[i][j] == T('c')) { 543 outputType = 'c'; 544 j++; 545 } 546 if (argv[i][j] == T('d')) { 547 if (argv[i][j + 1] == T('\0')) { 548 if (++i == argc) 549 usage(argv[0]); 550 outputDir = argv[i]; 551 } 552 else 553 outputDir = argv[i] + j + 1; 554 i++; 555 } 556 else if (argv[i][j] == T('e')) { 557 if (argv[i][j + 1] == T('\0')) { 558 if (++i == argc) 559 usage(argv[0]); 560 encoding = argv[i]; 561 } 562 else 563 encoding = argv[i] + j + 1; 564 i++; 565 } 566 else if (argv[i][j] == T('\0') && j > 1) 567 i++; 568 else 569 usage(argv[0]); 570 } 571 if (i == argc) 572 usage(argv[0]); 573 for (; i < argc; i++) { 574 FILE *fp = 0; 575 XML_Char *outName = 0; 576 int result; 577 XML_Parser parser = XML_ParserCreate(encoding); 578 if (outputDir) { 579 const XML_Char *file = argv[i]; 580 if (tcsrchr(file, T('/'))) 581 file = tcsrchr(file, T('/')) + 1; 582#ifdef WIN32 583 if (tcsrchr(file, T('\\'))) 584 file = tcsrchr(file, T('\\')) + 1; 585#endif 586 outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char)); 587 tcscpy(outName, outputDir); 588 tcscat(outName, T("/")); 589 tcscat(outName, file); 590 fp = tfopen(outName, T("wb")); 591 if (!fp) { 592 tperror(outName); 593 exit(1); 594 } 595#ifdef XML_UNICODE 596 puttc(0xFEFF, fp); 597#endif 598 XML_SetUserData(parser, fp); 599 switch (outputType) { 600 case 'm': 601 XML_UseParserAsHandlerArg(parser); 602 fputts(T("<document>\n"), fp); 603 XML_SetElementHandler(parser, metaStartElement, metaEndElement); 604 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction); 605 XML_SetCharacterDataHandler(parser, metaCharacterData); 606 XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl); 607 XML_SetNotationDeclHandler(parser, metaNotationDecl); 608 break; 609 case 'c': 610 XML_UseParserAsHandlerArg(parser); 611 XML_SetDefaultHandler(parser, markup); 612 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement); 613 XML_SetCharacterDataHandler(parser, defaultCharacterData); 614 XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction); 615 break; 616 default: 617 XML_SetElementHandler(parser, startElement, endElement); 618 XML_SetCharacterDataHandler(parser, characterData); 619 XML_SetProcessingInstructionHandler(parser, processingInstruction); 620 break; 621 } 622 } 623 if (windowsCodePages) 624 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0); 625 if (!XML_SetBase(parser, argv[i])) { 626 ftprintf(stderr, T("%s: out of memory"), argv[0]); 627 exit(1); 628 } 629 if (processExternalEntities) 630 XML_SetExternalEntityRefHandler(parser, 631 useFilemap 632 ? externalEntityRefFilemap 633 : externalEntityRefStream); 634 if (useFilemap) { 635 PROCESS_ARGS args; 636 args.retPtr = &result; 637 args.parser = parser; 638 if (!filemap(argv[i], processFile, &args)) 639 result = 0; 640 } 641 else 642 result = processStream(argv[i], parser); 643 if (outputDir) { 644 if (outputType == 'm') 645 fputts(T("</document>\n"), fp); 646 fclose(fp); 647 if (!result) 648 tremove(outName); 649 free(outName); 650 } 651 XML_ParserFree(parser); 652 } 653 return 0; 654} 655