1/* Copyright (C) 2021 Free Software Foundation, Inc.
2   Contributed by Oracle.
3
4   This file is part of GNU Binutils.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, 51 Franklin Street - Fifth Floor, Boston,
19   MA 02110-1301, USA.  */
20
21#include "config.h"
22#include <ctype.h>
23
24#include "util.h"
25#include "vec.h"
26#include "DefaultHandler.h"
27#include "SAXParser.h"
28#include "SAXParserFactory.h"
29#include "StringBuilder.h"
30
31/*
32 *  Private implementation of Attributes
33 */
34class AttributesP : public Attributes
35{
36public:
37  AttributesP ();
38  ~AttributesP ();
39  int getLength ();
40  const char *getQName (int index);
41  const char *getValue (int index);
42  int getIndex (const char *qName);
43  const char *getValue (const char *qName);
44  void append (char *qName, char *value);
45
46private:
47  Vector<char*> *names;
48  Vector<char*> *values;
49};
50
51AttributesP::AttributesP ()
52{
53  names = new Vector<char*>;
54  values = new Vector<char*>;
55}
56
57AttributesP::~AttributesP ()
58{
59  Destroy (names);
60  Destroy (values);
61}
62
63int
64AttributesP::getLength ()
65{
66  return names->size ();
67}
68
69const char *
70AttributesP::getQName (int index)
71{
72  if (index < 0 || index >= names->size ())
73    return NULL;
74  return names->fetch (index);
75}
76
77const char *
78AttributesP::getValue (int index)
79{
80  if (index < 0 || index >= values->size ())
81    return NULL;
82  return values->fetch (index);
83}
84
85int
86AttributesP::getIndex (const char *qName)
87{
88  for (int idx = 0; idx < names->size (); idx++)
89    if (strcmp (names->fetch (idx), qName) == 0)
90      return idx;
91  return -1;
92}
93
94const char *
95AttributesP::getValue (const char *qName)
96{
97  for (int idx = 0; idx < names->size (); idx++)
98    if (strcmp (names->fetch (idx), qName) == 0)
99      return values->fetch (idx);
100  return NULL;
101}
102
103void
104AttributesP::append (char *qName, char *value)
105{
106  names->append (qName);
107  values->append (value);
108}
109
110/*
111 *  Implementation of SAXException
112 */
113SAXException::SAXException ()
114{
115  message = strdup ("null");
116}
117
118SAXException::SAXException (const char *_message)
119{
120  if (_message == NULL)
121    message = strdup ("null");
122  else
123    message = strdup (_message);
124}
125
126SAXException::~SAXException ()
127{
128  free (message);
129}
130
131char *
132SAXException::getMessage ()
133{
134  return message;
135}
136
137/*
138 *  SAXParseException
139 */
140SAXParseException::SAXParseException (char *message, int _lineNumber, int _columnNumber)
141: SAXException (message == NULL ? GTXT ("XML parse error") : message)
142{
143  lineNumber = _lineNumber;
144  columnNumber = _columnNumber;
145}
146
147/*
148 *  Private implementation of SAXParser
149 */
150class SAXParserP : public SAXParser
151{
152public:
153  SAXParserP ();
154  ~SAXParserP ();
155  void reset ();
156  void parse (File*, DefaultHandler*);
157
158  bool
159  isNamespaceAware ()
160  {
161    return false;
162  }
163
164  bool
165  isValidating ()
166  {
167    return false;
168  }
169
170private:
171
172  static const int CH_EOF = -1;
173
174  void nextch ();
175  bool isWSpace ();
176  void skipWSpaces ();
177  void scanString (const char *str);
178  char *parseName ();
179  char *parseString ();
180  char *decodeString (char *str);
181  Attributes *parseAttributes ();
182  void parseTag ();
183  void parseDocument ();
184  void parsePart (int idx);
185
186  DefaultHandler *dh;
187  int bufsz;
188  char *buffer;
189  int cntsz;
190  int idx;
191  int curch;
192  int line;
193  int column;
194};
195
196SAXParserP::SAXParserP ()
197{
198  dh = NULL;
199  bufsz = 0x2000;
200  buffer = (char*) malloc (bufsz);
201  cntsz = 0;
202  idx = 0;
203  line = 1;
204  column = 0;
205}
206
207SAXParserP::~SAXParserP ()
208{
209  free (buffer);
210}
211
212void
213SAXParserP::reset ()
214{
215  dh = NULL;
216  bufsz = 8192;
217  buffer = (char*) realloc (buffer, bufsz);
218  cntsz = 0;
219  idx = 0;
220  line = 1;
221  column = 0;
222}
223
224void
225SAXParserP::parse (File *f, DefaultHandler *_dh)
226{
227  if (_dh == NULL)
228    return;
229  dh = _dh;
230  FILE *file = (FILE*) f;
231  int rem = bufsz;
232  cntsz = 0;
233  idx = 0;
234  for (;;)
235    {
236      int n = (int) fread (buffer + cntsz, 1, rem, file);
237      if (ferror (file) || n <= 0)
238	break;
239      cntsz += n;
240      if (feof (file))
241	break;
242      rem -= n;
243      if (rem == 0)
244	{
245	  int oldbufsz = bufsz;
246	  bufsz = bufsz >= 0x100000 ? bufsz + 0x100000 : bufsz * 2;
247	  buffer = (char*) realloc (buffer, bufsz);
248	  rem = bufsz - oldbufsz;
249	}
250    }
251  nextch ();
252  parseDocument ();
253}
254
255static int
256hex (char c)
257{
258  if (c >= '0' && c <= '9')
259    return (c - '0');
260  else if (c >= 'a' && c <= 'f')
261      return 10 + (c - 'a');
262  return -1;
263}
264
265void
266SAXParserP::nextch ()
267{
268  curch = idx >= cntsz ? CH_EOF : buffer[idx++];
269  if (curch == '\n')
270    {
271      line += 1;
272      column = 0;
273    }
274  else
275    column += 1;
276}
277
278bool
279SAXParserP::isWSpace ()
280{
281  return curch == ' ' || curch == '\t' || curch == '\n' || curch == '\r';
282}
283
284void
285SAXParserP::skipWSpaces ()
286{
287  while (isWSpace ())
288    nextch ();
289}
290
291void
292SAXParserP::scanString (const char *str)
293{
294  if (str == NULL || *str == '\0')
295    return;
296  for (;;)
297    {
298      if (curch == CH_EOF)
299	break;
300      else if (curch == *str)
301	{
302	  const char *p = str;
303	  for (;;)
304	    {
305	      p += 1;
306	      nextch ();
307	      if (*p == '\0')
308		return;
309	      if (curch != *p)
310		break;
311	    }
312	}
313      nextch ();
314    }
315}
316
317char *
318SAXParserP::parseName ()
319{
320  StringBuilder *name = new StringBuilder ();
321
322  if ((curch >= 'A' && curch <= 'Z') || (curch >= 'a' && curch <= 'z'))
323    {
324      name->append ((char) curch);
325      nextch ();
326      while (isalnum (curch) != 0 || curch == '_')
327	{
328	  name->append ((char) curch);
329	  nextch ();
330	}
331    }
332
333  char *res = name->toString ();
334  delete name;
335  return res;
336}
337
338/**
339 * Replaces encoded XML characters with original characters
340 * Attention: this method reuses the same string that is passed as the argument
341 * @param str
342 * @return str
343 */
344char *
345SAXParserP::decodeString (char * str)
346{
347  // Check if string has %22% and replace it with double quotes
348  // Also replace all other special combinations.
349  char *from = str;
350  char *to = str;
351  if (strstr (from, "%") || strstr (from, "&"))
352    {
353      int len = strlen (from);
354      for (int i = 0; i < len; i++)
355	{
356	  int nch = from[i];
357	  // Process &...; combinations
358	  if (nch == '&' && i + 3 < len)
359	    {
360	      if (from[i + 2] == 't' && from[i + 3] == ';')
361		{
362		  // check &lt; &gt;
363		  if (from[i + 1] == 'l')
364		    {
365		      nch = '<';
366		      i += 3;
367		    }
368		  else if (from[i + 1] == 'g')
369		    {
370		      nch = '>';
371		      i += 3;
372		    }
373		}
374	      else if (i + 4 < len && from[i + 4] == ';')
375		{
376		  // check &amp;
377		  if (from[i + 1] == 'a' && from[i + 2] == 'm' && from[i + 3] == 'p')
378		    {
379		      nch = '&';
380		      i += 4;
381		    }
382		}
383	      else if ((i + 5 < len) && (from[i + 5] == ';'))
384		{
385		  // check &apos; &quot;
386		  if (from[i + 1] == 'a' && from[i + 2] == 'p'
387		      && from[i + 3] == 'o' && from[i + 4] == 's')
388		    {
389		      nch = '\'';
390		      i += 5;
391		    }
392		  if (from[i + 1] == 'q' && from[i + 2] == 'u' && from[i + 3] == 'o' && from[i + 4] == 't')
393		    {
394		      nch = '"';
395		      i += 5;
396		    }
397		}
398	    }
399	  // Process %XX% combinations
400	  if (nch == '%' && i + 3 < len && from[i + 3] == '%')
401	    {
402	      int ch = hex (from[i + 1]);
403	      if (ch >= 0)
404		{
405		  int ch2 = hex (from[i + 2]);
406		  if (ch2 >= 0)
407		    {
408		      ch = ch * 16 + ch2;
409		      nch = ch;
410		      i += 3;
411		    }
412		}
413	    }
414	  *to++ = (char) nch;
415	}
416      *to = '\0';
417    }
418  return str;
419}
420
421char *
422SAXParserP::parseString ()
423{
424  StringBuilder *str = new StringBuilder ();
425  int quote = '>';
426  if (curch == '"')
427    {
428      quote = curch;
429      nextch ();
430    }
431  for (;;)
432    {
433      if (curch == CH_EOF)
434	break;
435      if (curch == quote)
436	{
437	  nextch ();
438	  break;
439	}
440      str->append ((char) curch);
441      nextch ();
442    }
443
444  char *res = str->toString ();
445  // Decode XML characters
446  res = decodeString (res);
447  delete str;
448  return res;
449}
450
451Attributes *
452SAXParserP::parseAttributes ()
453{
454  AttributesP *attrs = new AttributesP ();
455
456  for (;;)
457    {
458      skipWSpaces ();
459      char *name = parseName ();
460      if (name == NULL || *name == '\0')
461	{
462	  free (name);
463	  break;
464	}
465      skipWSpaces ();
466      if (curch != '=')
467	{
468	  SAXParseException *e = new SAXParseException (NULL, line, column);
469	  dh->error (e);
470	  scanString (">");
471	  free (name);
472	  return attrs;
473	}
474      nextch ();
475      skipWSpaces ();
476      char *value = parseString ();
477      attrs->append (name, value);
478    }
479  return attrs;
480}
481
482void
483SAXParserP::parseTag ()
484{
485  skipWSpaces ();
486  bool empty = false;
487  char *name = parseName ();
488  if (name == NULL || *name == '\0')
489    {
490      SAXParseException *e = new SAXParseException (NULL, line, column);
491      dh->error (e);
492      scanString (">");
493      free (name);
494      return;
495    }
496
497  Attributes *attrs = parseAttributes ();
498  if (curch == '/')
499    {
500      nextch ();
501      empty = true;
502    }
503  if (curch == '>')
504    nextch ();
505  else
506    {
507      empty = false;
508      SAXParseException *e = new SAXParseException (NULL, line, column);
509      dh->error (e);
510      scanString (">");
511    }
512  if (curch == CH_EOF)
513    {
514      free (name);
515      delete attrs;
516      return;
517    }
518  dh->startElement (NULL, NULL, name, attrs);
519  if (empty)
520    {
521      dh->endElement (NULL, NULL, name);
522      free (name);
523      delete attrs;
524      return;
525    }
526
527  StringBuilder *chars = new StringBuilder ();
528  bool wspaces = true;
529  for (;;)
530    {
531      if (curch == CH_EOF)
532	break;
533      else if (curch == '<')
534	{
535	  if (chars->length () > 0)
536	    {
537	      char *str = chars->toString ();
538	      // Decode XML characters
539	      str = decodeString (str);
540	      if (wspaces)
541		dh->ignorableWhitespace (str, 0, chars->length ());
542	      else
543		dh->characters (str, 0, chars->length ());
544	      free (str);
545	      chars->setLength (0);
546	      wspaces = true;
547	    }
548	  nextch ();
549	  if (curch == '/')
550	    {
551	      nextch ();
552	      char *ename = parseName ();
553	      if (ename && *ename != '\0')
554		{
555		  if (strcmp (name, ename) == 0)
556		    {
557		      skipWSpaces ();
558		      if (curch == '>')
559			{
560			  nextch ();
561			  dh->endElement (NULL, NULL, name);
562			  free (ename);
563			  break;
564			}
565		      SAXParseException *e = new SAXParseException (NULL, line, column);
566		      dh->error (e);
567		    }
568		  else
569		    {
570		      SAXParseException *e = new SAXParseException (NULL, line, column);
571		      dh->error (e);
572		    }
573		  scanString (">");
574		}
575	      free (ename);
576	    }
577	  else
578	    parseTag ();
579	}
580      else
581	{
582	  if (!isWSpace ())
583	    wspaces = false;
584	  chars->append ((char) curch);
585	  nextch ();
586	}
587    }
588
589  free (name);
590  delete attrs;
591  delete chars;
592  return;
593}
594
595void
596SAXParserP::parseDocument ()
597{
598  dh->startDocument ();
599  for (;;)
600    {
601      if (curch == CH_EOF)
602	break;
603      if (curch == '<')
604	{
605	  nextch ();
606	  if (curch == '?')
607	    scanString ("?>");
608	  else if (curch == '!')
609	    scanString (">");
610	  else
611	    parseTag ();
612	}
613      else
614	nextch ();
615    }
616  dh->endDocument ();
617}
618
619/*
620 *  Private implementation of SAXParserFactory
621 */
622class SAXParserFactoryP : public SAXParserFactory
623{
624public:
625  SAXParserFactoryP () { }
626  ~SAXParserFactoryP () { }
627  SAXParser *newSAXParser ();
628
629  void
630  setFeature (const char *, bool) { }
631
632  bool
633  getFeature (const char *)
634  {
635    return false;
636  }
637};
638
639SAXParser *
640SAXParserFactoryP::newSAXParser ()
641{
642  return new SAXParserP ();
643}
644
645/*
646 *  SAXParserFactory
647 */
648const char *SAXParserFactory::DEFAULT_PROPERTY_NAME = "javax.xml.parsers.SAXParserFactory";
649
650SAXParserFactory *
651SAXParserFactory::newInstance ()
652{
653  return new SAXParserFactoryP ();
654}
655
656void
657DefaultHandler::dump_startElement (const char *qName, Attributes *attrs)
658{
659  fprintf (stderr, NTXT ("DefaultHandler::startElement qName='%s'\n"), STR (qName));
660  for (int i = 0, sz = attrs ? attrs->getLength () : 0; i < sz; i++)
661    {
662      const char *qn = attrs->getQName (i);
663      const char *vl = attrs->getValue (i);
664      fprintf (stderr, NTXT ("  %d  '%s' = '%s'\n"), i, STR (qn), STR (vl));
665    }
666}
667