1/* Copyright (C) 2021 Free Software Foundation, Inc. 2 Contributed by Oracle. 3 4 This file is part of GNU Binutils. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, 51 Franklin Street - Fifth Floor, Boston, 19 MA 02110-1301, USA. */ 20 21#include "config.h" 22#include <ctype.h> 23 24#include "util.h" 25#include "vec.h" 26#include "DefaultHandler.h" 27#include "SAXParser.h" 28#include "SAXParserFactory.h" 29#include "StringBuilder.h" 30 31/* 32 * Private implementation of Attributes 33 */ 34class AttributesP : public Attributes 35{ 36public: 37 AttributesP (); 38 ~AttributesP (); 39 int getLength (); 40 const char *getQName (int index); 41 const char *getValue (int index); 42 int getIndex (const char *qName); 43 const char *getValue (const char *qName); 44 void append (char *qName, char *value); 45 46private: 47 Vector<char*> *names; 48 Vector<char*> *values; 49}; 50 51AttributesP::AttributesP () 52{ 53 names = new Vector<char*>; 54 values = new Vector<char*>; 55} 56 57AttributesP::~AttributesP () 58{ 59 Destroy (names); 60 Destroy (values); 61} 62 63int 64AttributesP::getLength () 65{ 66 return names->size (); 67} 68 69const char * 70AttributesP::getQName (int index) 71{ 72 if (index < 0 || index >= names->size ()) 73 return NULL; 74 return names->fetch (index); 75} 76 77const char * 78AttributesP::getValue (int index) 79{ 80 if (index < 0 || index >= values->size ()) 81 return NULL; 82 return values->fetch (index); 83} 84 85int 86AttributesP::getIndex (const char *qName) 87{ 88 for (int idx = 0; idx < names->size (); idx++) 89 if (strcmp (names->fetch (idx), qName) == 0) 90 return idx; 91 return -1; 92} 93 94const char * 95AttributesP::getValue (const char *qName) 96{ 97 for (int idx = 0; idx < names->size (); idx++) 98 if (strcmp (names->fetch (idx), qName) == 0) 99 return values->fetch (idx); 100 return NULL; 101} 102 103void 104AttributesP::append (char *qName, char *value) 105{ 106 names->append (qName); 107 values->append (value); 108} 109 110/* 111 * Implementation of SAXException 112 */ 113SAXException::SAXException () 114{ 115 message = strdup ("null"); 116} 117 118SAXException::SAXException (const char *_message) 119{ 120 if (_message == NULL) 121 message = strdup ("null"); 122 else 123 message = strdup (_message); 124} 125 126SAXException::~SAXException () 127{ 128 free (message); 129} 130 131char * 132SAXException::getMessage () 133{ 134 return message; 135} 136 137/* 138 * SAXParseException 139 */ 140SAXParseException::SAXParseException (char *message, int _lineNumber, int _columnNumber) 141: SAXException (message == NULL ? GTXT ("XML parse error") : message) 142{ 143 lineNumber = _lineNumber; 144 columnNumber = _columnNumber; 145} 146 147/* 148 * Private implementation of SAXParser 149 */ 150class SAXParserP : public SAXParser 151{ 152public: 153 SAXParserP (); 154 ~SAXParserP (); 155 void reset (); 156 void parse (File*, DefaultHandler*); 157 158 bool 159 isNamespaceAware () 160 { 161 return false; 162 } 163 164 bool 165 isValidating () 166 { 167 return false; 168 } 169 170private: 171 172 static const int CH_EOF = -1; 173 174 void nextch (); 175 bool isWSpace (); 176 void skipWSpaces (); 177 void scanString (const char *str); 178 char *parseName (); 179 char *parseString (); 180 char *decodeString (char *str); 181 Attributes *parseAttributes (); 182 void parseTag (); 183 void parseDocument (); 184 void parsePart (int idx); 185 186 DefaultHandler *dh; 187 int bufsz; 188 char *buffer; 189 int cntsz; 190 int idx; 191 int curch; 192 int line; 193 int column; 194}; 195 196SAXParserP::SAXParserP () 197{ 198 dh = NULL; 199 bufsz = 0x2000; 200 buffer = (char*) malloc (bufsz); 201 cntsz = 0; 202 idx = 0; 203 line = 1; 204 column = 0; 205} 206 207SAXParserP::~SAXParserP () 208{ 209 free (buffer); 210} 211 212void 213SAXParserP::reset () 214{ 215 dh = NULL; 216 bufsz = 8192; 217 buffer = (char*) realloc (buffer, bufsz); 218 cntsz = 0; 219 idx = 0; 220 line = 1; 221 column = 0; 222} 223 224void 225SAXParserP::parse (File *f, DefaultHandler *_dh) 226{ 227 if (_dh == NULL) 228 return; 229 dh = _dh; 230 FILE *file = (FILE*) f; 231 int rem = bufsz; 232 cntsz = 0; 233 idx = 0; 234 for (;;) 235 { 236 int n = (int) fread (buffer + cntsz, 1, rem, file); 237 if (ferror (file) || n <= 0) 238 break; 239 cntsz += n; 240 if (feof (file)) 241 break; 242 rem -= n; 243 if (rem == 0) 244 { 245 int oldbufsz = bufsz; 246 bufsz = bufsz >= 0x100000 ? bufsz + 0x100000 : bufsz * 2; 247 buffer = (char*) realloc (buffer, bufsz); 248 rem = bufsz - oldbufsz; 249 } 250 } 251 nextch (); 252 parseDocument (); 253} 254 255static int 256hex (char c) 257{ 258 if (c >= '0' && c <= '9') 259 return (c - '0'); 260 else if (c >= 'a' && c <= 'f') 261 return 10 + (c - 'a'); 262 return -1; 263} 264 265void 266SAXParserP::nextch () 267{ 268 curch = idx >= cntsz ? CH_EOF : buffer[idx++]; 269 if (curch == '\n') 270 { 271 line += 1; 272 column = 0; 273 } 274 else 275 column += 1; 276} 277 278bool 279SAXParserP::isWSpace () 280{ 281 return curch == ' ' || curch == '\t' || curch == '\n' || curch == '\r'; 282} 283 284void 285SAXParserP::skipWSpaces () 286{ 287 while (isWSpace ()) 288 nextch (); 289} 290 291void 292SAXParserP::scanString (const char *str) 293{ 294 if (str == NULL || *str == '\0') 295 return; 296 for (;;) 297 { 298 if (curch == CH_EOF) 299 break; 300 else if (curch == *str) 301 { 302 const char *p = str; 303 for (;;) 304 { 305 p += 1; 306 nextch (); 307 if (*p == '\0') 308 return; 309 if (curch != *p) 310 break; 311 } 312 } 313 nextch (); 314 } 315} 316 317char * 318SAXParserP::parseName () 319{ 320 StringBuilder *name = new StringBuilder (); 321 322 if ((curch >= 'A' && curch <= 'Z') || (curch >= 'a' && curch <= 'z')) 323 { 324 name->append ((char) curch); 325 nextch (); 326 while (isalnum (curch) != 0 || curch == '_') 327 { 328 name->append ((char) curch); 329 nextch (); 330 } 331 } 332 333 char *res = name->toString (); 334 delete name; 335 return res; 336} 337 338/** 339 * Replaces encoded XML characters with original characters 340 * Attention: this method reuses the same string that is passed as the argument 341 * @param str 342 * @return str 343 */ 344char * 345SAXParserP::decodeString (char * str) 346{ 347 // Check if string has %22% and replace it with double quotes 348 // Also replace all other special combinations. 349 char *from = str; 350 char *to = str; 351 if (strstr (from, "%") || strstr (from, "&")) 352 { 353 int len = strlen (from); 354 for (int i = 0; i < len; i++) 355 { 356 int nch = from[i]; 357 // Process &...; combinations 358 if (nch == '&' && i + 3 < len) 359 { 360 if (from[i + 2] == 't' && from[i + 3] == ';') 361 { 362 // check < > 363 if (from[i + 1] == 'l') 364 { 365 nch = '<'; 366 i += 3; 367 } 368 else if (from[i + 1] == 'g') 369 { 370 nch = '>'; 371 i += 3; 372 } 373 } 374 else if (i + 4 < len && from[i + 4] == ';') 375 { 376 // check & 377 if (from[i + 1] == 'a' && from[i + 2] == 'm' && from[i + 3] == 'p') 378 { 379 nch = '&'; 380 i += 4; 381 } 382 } 383 else if ((i + 5 < len) && (from[i + 5] == ';')) 384 { 385 // check ' " 386 if (from[i + 1] == 'a' && from[i + 2] == 'p' 387 && from[i + 3] == 'o' && from[i + 4] == 's') 388 { 389 nch = '\''; 390 i += 5; 391 } 392 if (from[i + 1] == 'q' && from[i + 2] == 'u' && from[i + 3] == 'o' && from[i + 4] == 't') 393 { 394 nch = '"'; 395 i += 5; 396 } 397 } 398 } 399 // Process %XX% combinations 400 if (nch == '%' && i + 3 < len && from[i + 3] == '%') 401 { 402 int ch = hex (from[i + 1]); 403 if (ch >= 0) 404 { 405 int ch2 = hex (from[i + 2]); 406 if (ch2 >= 0) 407 { 408 ch = ch * 16 + ch2; 409 nch = ch; 410 i += 3; 411 } 412 } 413 } 414 *to++ = (char) nch; 415 } 416 *to = '\0'; 417 } 418 return str; 419} 420 421char * 422SAXParserP::parseString () 423{ 424 StringBuilder *str = new StringBuilder (); 425 int quote = '>'; 426 if (curch == '"') 427 { 428 quote = curch; 429 nextch (); 430 } 431 for (;;) 432 { 433 if (curch == CH_EOF) 434 break; 435 if (curch == quote) 436 { 437 nextch (); 438 break; 439 } 440 str->append ((char) curch); 441 nextch (); 442 } 443 444 char *res = str->toString (); 445 // Decode XML characters 446 res = decodeString (res); 447 delete str; 448 return res; 449} 450 451Attributes * 452SAXParserP::parseAttributes () 453{ 454 AttributesP *attrs = new AttributesP (); 455 456 for (;;) 457 { 458 skipWSpaces (); 459 char *name = parseName (); 460 if (name == NULL || *name == '\0') 461 { 462 free (name); 463 break; 464 } 465 skipWSpaces (); 466 if (curch != '=') 467 { 468 SAXParseException *e = new SAXParseException (NULL, line, column); 469 dh->error (e); 470 scanString (">"); 471 free (name); 472 return attrs; 473 } 474 nextch (); 475 skipWSpaces (); 476 char *value = parseString (); 477 attrs->append (name, value); 478 } 479 return attrs; 480} 481 482void 483SAXParserP::parseTag () 484{ 485 skipWSpaces (); 486 bool empty = false; 487 char *name = parseName (); 488 if (name == NULL || *name == '\0') 489 { 490 SAXParseException *e = new SAXParseException (NULL, line, column); 491 dh->error (e); 492 scanString (">"); 493 free (name); 494 return; 495 } 496 497 Attributes *attrs = parseAttributes (); 498 if (curch == '/') 499 { 500 nextch (); 501 empty = true; 502 } 503 if (curch == '>') 504 nextch (); 505 else 506 { 507 empty = false; 508 SAXParseException *e = new SAXParseException (NULL, line, column); 509 dh->error (e); 510 scanString (">"); 511 } 512 if (curch == CH_EOF) 513 { 514 free (name); 515 delete attrs; 516 return; 517 } 518 dh->startElement (NULL, NULL, name, attrs); 519 if (empty) 520 { 521 dh->endElement (NULL, NULL, name); 522 free (name); 523 delete attrs; 524 return; 525 } 526 527 StringBuilder *chars = new StringBuilder (); 528 bool wspaces = true; 529 for (;;) 530 { 531 if (curch == CH_EOF) 532 break; 533 else if (curch == '<') 534 { 535 if (chars->length () > 0) 536 { 537 char *str = chars->toString (); 538 // Decode XML characters 539 str = decodeString (str); 540 if (wspaces) 541 dh->ignorableWhitespace (str, 0, chars->length ()); 542 else 543 dh->characters (str, 0, chars->length ()); 544 free (str); 545 chars->setLength (0); 546 wspaces = true; 547 } 548 nextch (); 549 if (curch == '/') 550 { 551 nextch (); 552 char *ename = parseName (); 553 if (ename && *ename != '\0') 554 { 555 if (strcmp (name, ename) == 0) 556 { 557 skipWSpaces (); 558 if (curch == '>') 559 { 560 nextch (); 561 dh->endElement (NULL, NULL, name); 562 free (ename); 563 break; 564 } 565 SAXParseException *e = new SAXParseException (NULL, line, column); 566 dh->error (e); 567 } 568 else 569 { 570 SAXParseException *e = new SAXParseException (NULL, line, column); 571 dh->error (e); 572 } 573 scanString (">"); 574 } 575 free (ename); 576 } 577 else 578 parseTag (); 579 } 580 else 581 { 582 if (!isWSpace ()) 583 wspaces = false; 584 chars->append ((char) curch); 585 nextch (); 586 } 587 } 588 589 free (name); 590 delete attrs; 591 delete chars; 592 return; 593} 594 595void 596SAXParserP::parseDocument () 597{ 598 dh->startDocument (); 599 for (;;) 600 { 601 if (curch == CH_EOF) 602 break; 603 if (curch == '<') 604 { 605 nextch (); 606 if (curch == '?') 607 scanString ("?>"); 608 else if (curch == '!') 609 scanString (">"); 610 else 611 parseTag (); 612 } 613 else 614 nextch (); 615 } 616 dh->endDocument (); 617} 618 619/* 620 * Private implementation of SAXParserFactory 621 */ 622class SAXParserFactoryP : public SAXParserFactory 623{ 624public: 625 SAXParserFactoryP () { } 626 ~SAXParserFactoryP () { } 627 SAXParser *newSAXParser (); 628 629 void 630 setFeature (const char *, bool) { } 631 632 bool 633 getFeature (const char *) 634 { 635 return false; 636 } 637}; 638 639SAXParser * 640SAXParserFactoryP::newSAXParser () 641{ 642 return new SAXParserP (); 643} 644 645/* 646 * SAXParserFactory 647 */ 648const char *SAXParserFactory::DEFAULT_PROPERTY_NAME = "javax.xml.parsers.SAXParserFactory"; 649 650SAXParserFactory * 651SAXParserFactory::newInstance () 652{ 653 return new SAXParserFactoryP (); 654} 655 656void 657DefaultHandler::dump_startElement (const char *qName, Attributes *attrs) 658{ 659 fprintf (stderr, NTXT ("DefaultHandler::startElement qName='%s'\n"), STR (qName)); 660 for (int i = 0, sz = attrs ? attrs->getLength () : 0; i < sz; i++) 661 { 662 const char *qn = attrs->getQName (i); 663 const char *vl = attrs->getValue (i); 664 fprintf (stderr, NTXT (" %d '%s' = '%s'\n"), i, STR (qn), STR (vl)); 665 } 666} 667