1/*
2 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.internal.util.xml.impl;
27
28import java.io.IOException;
29import java.io.InputStream;
30import java.io.InputStreamReader;
31import java.io.Reader;
32import java.io.UnsupportedEncodingException;
33import java.util.HashMap;
34import java.util.Map;
35import jdk.internal.org.xml.sax.InputSource;
36import jdk.internal.org.xml.sax.SAXException;
37
38/**
39 * XML non-validating parser engine.
40 */
41public abstract class Parser {
42
43    public static final String FAULT = "";
44    protected static final int BUFFSIZE_READER = 512;
45    protected static final int BUFFSIZE_PARSER = 128;
46    /**
47     * The end of stream character.
48     */
49    public static final char EOS = 0xffff;
50    private Pair mNoNS; // there is no namespace
51    private Pair mXml;  // the xml namespace
52    private Map<String, Input> mEnt;  // the entities look up table
53    private Map<String, Input> mPEnt; // the parmeter entities look up table
54    protected boolean mIsSAlone;     // xml decl standalone flag
55    protected boolean mIsSAloneSet;  // standalone is explicitely set
56    protected boolean mIsNSAware;    // if true - namespace aware mode
57    protected int mPh;  // current phase of document processing
58    protected static final int PH_BEFORE_DOC = -1;  // before parsing
59    protected static final int PH_DOC_START = 0;   // document start
60    protected static final int PH_MISC_DTD = 1;   // misc before DTD
61    protected static final int PH_DTD = 2;   // DTD
62    protected static final int PH_DTD_MISC = 3;   // misc after DTD
63    protected static final int PH_DOCELM = 4;   // document's element
64    protected static final int PH_DOCELM_MISC = 5;   // misc after element
65    protected static final int PH_AFTER_DOC = 6;   // after parsing
66    protected int mEvt;  // current event type
67    protected static final int EV_NULL = 0;   // unknown
68    protected static final int EV_ELM = 1;   // empty element
69    protected static final int EV_ELMS = 2;   // start element
70    protected static final int EV_ELME = 3;   // end element
71    protected static final int EV_TEXT = 4;   // textual content
72    protected static final int EV_WSPC = 5;   // white space content
73    protected static final int EV_PI = 6;   // processing instruction
74    protected static final int EV_CDAT = 7;   // character data
75    protected static final int EV_COMM = 8;   // comment
76    protected static final int EV_DTD = 9;   // document type definition
77    protected static final int EV_ENT = 10;  // skipped entity
78    private char mESt; // built-in entity recognizer state
79    // mESt values:
80    //   0x100   : the initial state
81    //   > 0x100 : unrecognized name
82    //   < 0x100 : replacement character
83    protected char[] mBuff;       // parser buffer
84    protected int mBuffIdx;    // index of the last char
85    protected Pair mPref;       // stack of prefixes
86    protected Pair mElm;        // stack of elements
87    // mAttL.chars - element qname
88    // mAttL.next  - next element
89    // mAttL.list  - list of attributes defined on this element
90    // mAttL.list.chars - attribute qname
91    // mAttL.list.id    - a char representing attribute's type see below
92    // mAttL.list.next  - next attribute defined on the element
93    // mAttL.list.list  - devault value structure or null
94    // mAttL.list.list.chars - "name='value' " chars array for Input
95    //
96    // Attribute type character values:
97    // 'i' - "ID"
98    // 'r' - "IDREF"
99    // 'R' - "IDREFS"
100    // 'n' - "ENTITY"
101    // 'N' - "ENTITIES"
102    // 't' - "NMTOKEN"
103    // 'T' - "NMTOKENS"
104    // 'u' - enumeration type
105    // 'o' - "NOTATION"
106    // 'c' - "CDATA"
107    // see also: bkeyword() and atype()
108    //
109    protected Pair mAttL;       // list of defined attrs by element name
110    protected Input mDoc;        // document entity
111    protected Input mInp;        // stack of entities
112    private char[] mChars;      // reading buffer
113    private int mChLen;      // current capacity
114    private int mChIdx;      // index to the next char
115    protected Attrs mAttrs;      // attributes of the curr. element
116    private String[] mItems;      // attributes array of the curr. element
117    private char mAttrIdx;    // attributes counter/index
118    private String mUnent;  // unresolved entity name
119    private Pair mDltd;   // deleted objects for reuse
120    /**
121     * Default prefixes
122     */
123    private static final char NONS[];
124    private static final char XML[];
125    private static final char XMLNS[];
126
127    static {
128        NONS = new char[1];
129        NONS[0] = (char) 0;
130
131        XML = new char[4];
132        XML[0] = (char) 4;
133        XML[1] = 'x';
134        XML[2] = 'm';
135        XML[3] = 'l';
136
137        XMLNS = new char[6];
138        XMLNS[0] = (char) 6;
139        XMLNS[1] = 'x';
140        XMLNS[2] = 'm';
141        XMLNS[3] = 'l';
142        XMLNS[4] = 'n';
143        XMLNS[5] = 's';
144    }
145    /**
146     * ASCII character type array.
147     *
148     * This array maps an ASCII (7 bit) character to the character type.<br>
149     * Possible character type values are:<br> - ' ' for any kind of white
150     * space character;<br> - 'a' for any lower case alphabetical character
151     * value;<br> - 'A' for any upper case alphabetical character value;<br>
152     * - 'd' for any decimal digit character value;<br> - 'z' for any
153     * character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit)
154     * character which does not fall in any category listed above is mapped to
155     * it self.
156     */
157    private static final byte asctyp[];
158    /**
159     * NMTOKEN character type array.
160     *
161     * This array maps an ASCII (7 bit) character to the character type.<br>
162     * Possible character type values are:<br> - 0 for underscore ('_') or any
163     * lower and upper case alphabetical character value;<br> - 1 for colon
164     * (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal
165     * digit character value;<br> - 3 for any kind of white space character<br>
166     * An ASCII (7 bit) character which does not fall in any category listed
167     * above is mapped to 0xff.
168     */
169    private static final byte nmttyp[];
170
171    /**
172     * Static constructor.
173     *
174     * Sets up the ASCII character type array which is used by
175     * {@link #asctyp asctyp} method and NMTOKEN character type array.
176     */
177    static {
178        short i = 0;
179
180        asctyp = new byte[0x80];
181        while (i < ' ') {
182            asctyp[i++] = (byte) 'z';
183        }
184        asctyp['\t'] = (byte) ' ';
185        asctyp['\r'] = (byte) ' ';
186        asctyp['\n'] = (byte) ' ';
187        while (i < '0') {
188            asctyp[i] = (byte) i++;
189        }
190        while (i <= '9') {
191            asctyp[i++] = (byte) 'd';
192        }
193        while (i < 'A') {
194            asctyp[i] = (byte) i++;
195        }
196        while (i <= 'Z') {
197            asctyp[i++] = (byte) 'A';
198        }
199        while (i < 'a') {
200            asctyp[i] = (byte) i++;
201        }
202        while (i <= 'z') {
203            asctyp[i++] = (byte) 'a';
204        }
205        while (i < 0x80) {
206            asctyp[i] = (byte) i++;
207        }
208
209        nmttyp = new byte[0x80];
210        for (i = 0; i < '0'; i++) {
211            nmttyp[i] = (byte) 0xff;
212        }
213        while (i <= '9') {
214            nmttyp[i++] = (byte) 2;  // digits
215        }
216        while (i < 'A') {
217            nmttyp[i++] = (byte) 0xff;
218        }
219        // skiped upper case alphabetical character are already 0
220        for (i = '['; i < 'a'; i++) {
221            nmttyp[i] = (byte) 0xff;
222        }
223        // skiped lower case alphabetical character are already 0
224        for (i = '{'; i < 0x80; i++) {
225            nmttyp[i] = (byte) 0xff;
226        }
227        nmttyp['_'] = 0;
228        nmttyp[':'] = 1;
229        nmttyp['.'] = 2;
230        nmttyp['-'] = 2;
231        nmttyp[' '] = 3;
232        nmttyp['\t'] = 3;
233        nmttyp['\r'] = 3;
234        nmttyp['\n'] = 3;
235    }
236
237    /**
238     * Constructor.
239     */
240    protected Parser() {
241        mPh = PH_BEFORE_DOC;  // before parsing
242
243        //              Initialize the parser
244        mBuff = new char[BUFFSIZE_PARSER];
245        mAttrs = new Attrs();
246
247        //              Default namespace
248        mPref = pair(mPref);
249        mPref.name = "";
250        mPref.value = "";
251        mPref.chars = NONS;
252        mNoNS = mPref;  // no namespace
253        //              XML namespace
254        mPref = pair(mPref);
255        mPref.name = "xml";
256        mPref.value = "http://www.w3.org/XML/1998/namespace";
257        mPref.chars = XML;
258        mXml = mPref;  // XML namespace
259    }
260
261    /**
262     * Initializes parser's internals. Note, current input has to be set before
263     * this method is called.
264     */
265    protected void init() {
266        mUnent = null;
267        mElm = null;
268        mPref = mXml;
269        mAttL = null;
270        mPEnt = new HashMap<>();
271        mEnt = new HashMap<>();
272        mDoc = mInp;          // current input is document entity
273        mChars = mInp.chars;    // use document entity buffer
274        mPh = PH_DOC_START;  // the begining of the document
275    }
276
277    /**
278     * Cleans up parser internal resources.
279     */
280    protected void cleanup() {
281        //              Default attributes
282        while (mAttL != null) {
283            while (mAttL.list != null) {
284                if (mAttL.list.list != null) {
285                    del(mAttL.list.list);
286                }
287                mAttL.list = del(mAttL.list);
288            }
289            mAttL = del(mAttL);
290        }
291        //              Element stack
292        while (mElm != null) {
293            mElm = del(mElm);
294        }
295        //              Namespace prefixes
296        while (mPref != mXml) {
297            mPref = del(mPref);
298        }
299        //              Inputs
300        while (mInp != null) {
301            pop();
302        }
303        //              Document reader
304        if ((mDoc != null) && (mDoc.src != null)) {
305            try {
306                mDoc.src.close();
307            } catch (IOException ioe) {
308            }
309        }
310        mPEnt = null;
311        mEnt = null;
312        mDoc = null;
313        mPh = PH_AFTER_DOC;  // before documnet processing
314    }
315
316    /**
317     * Processes a portion of document. This method returns one of EV_*
318     * constants as an identifier of the portion of document have been read.
319     *
320     * @return Identifier of processed document portion.
321     * @exception Exception is parser specific exception form panic method.
322     * @exception IOException
323     */
324    @SuppressWarnings("fallthrough")
325    protected int step() throws Exception {
326        mEvt = EV_NULL;
327        int st = 0;
328        while (mEvt == EV_NULL) {
329            char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
330            switch (st) {
331                case 0:     // all sorts of markup (dispetcher)
332                    if (ch != '<') {
333                        bkch();
334                        mBuffIdx = -1;  // clean parser buffer
335                        st = 1;
336                        break;
337                    }
338                    switch (getch()) {
339                        case '/':  // the end of the element content
340                            mEvt = EV_ELME;
341                            if (mElm == null) {
342                                panic(FAULT);
343                            }
344                            //          Check element's open/close tags balance
345                            mBuffIdx = -1;  // clean parser buffer
346                            bname(mIsNSAware);
347                            char[] chars = mElm.chars;
348                            if (chars.length == (mBuffIdx + 1)) {
349                                for (char i = 1; i <= mBuffIdx; i += 1) {
350                                    if (chars[i] != mBuff[i]) {
351                                        panic(FAULT);
352                                    }
353                                }
354                            } else {
355                                panic(FAULT);
356                            }
357                            //          Skip white spaces before '>'
358                            if (wsskip() != '>') {
359                                panic(FAULT);
360                            }
361                            getch();  // read '>'
362                            break;
363
364                        case '!':  // a comment or a CDATA
365                            ch = getch();
366                            bkch();
367                            switch (ch) {
368                                case '-':  // must be a comment
369                                    mEvt = EV_COMM;
370                                    comm();
371                                    break;
372
373                                case '[':  // must be a CDATA section
374                                    mEvt = EV_CDAT;
375                                    cdat();
376                                    break;
377
378                                default:   // must be 'DOCTYPE'
379                                    mEvt = EV_DTD;
380                                    dtd();
381                                    break;
382                            }
383                            break;
384
385                        case '?':  // processing instruction
386                            mEvt = EV_PI;
387                            pi();
388                            break;
389
390                        default:  // must be the first char of an xml name
391                            bkch();
392                            //          Read an element name and put it on top of the
393                            //          element stack
394                            mElm = pair(mElm);  // add new element to the stack
395                            mElm.chars = qname(mIsNSAware);
396                            mElm.name = mElm.local();
397                            mElm.id = (mElm.next != null) ? mElm.next.id : 0;  // flags
398                            mElm.num = 0;     // namespace counter
399                            //          Find the list of defined attributs of the current
400                            //          element
401                            Pair elm = find(mAttL, mElm.chars);
402                            mElm.list = (elm != null) ? elm.list : null;
403                            //          Read attributes till the end of the element tag
404                            mAttrIdx = 0;
405                            Pair att = pair(null);
406                            att.num = 0;  // clear attribute's flags
407                            attr(att);     // get all attributes inc. defaults
408                            del(att);
409                            mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null;
410                            //          Skip white spaces before '>'
411                            switch (wsskip()) {
412                                case '>':
413                                    getch();  // read '>'
414                                    mEvt = EV_ELMS;
415                                    break;
416
417                                case '/':
418                                    getch();  // read '/'
419                                    if (getch() != '>') // read '>'
420                                    {
421                                        panic(FAULT);
422                                    }
423                                    mEvt = EV_ELM;
424                                    break;
425
426                                default:
427                                    panic(FAULT);
428                            }
429                            break;
430                    }
431                    break;
432
433                case 1:     // read white space
434                    switch (ch) {
435                        case ' ':
436                        case '\t':
437                        case '\n':
438                            bappend(ch);
439                            break;
440
441                        case '\r':              // EOL processing [#2.11]
442                            if (getch() != '\n') {
443                                bkch();
444                            }
445                            bappend('\n');
446                            break;
447
448                        case '<':
449                            mEvt = EV_WSPC;
450                            bkch();
451                            bflash_ws();
452                            break;
453
454                        default:
455                            bkch();
456                            st = 2;
457                            break;
458                    }
459                    break;
460
461                case 2:     // read the text content of the element
462                    switch (ch) {
463                        case '&':
464                            if (mUnent == null) {
465                                //              There was no unresolved entity on previous step.
466                                if ((mUnent = ent('x')) != null) {
467                                    mEvt = EV_TEXT;
468                                    bkch();      // move back to ';' after entity name
469                                    setch('&');  // parser must be back on next step
470                                    bflash();
471                                }
472                            } else {
473                                //              There was unresolved entity on previous step.
474                                mEvt = EV_ENT;
475                                skippedEnt(mUnent);
476                                mUnent = null;
477                            }
478                            break;
479
480                        case '<':
481                            mEvt = EV_TEXT;
482                            bkch();
483                            bflash();
484                            break;
485
486                        case '\r':  // EOL processing [#2.11]
487                            if (getch() != '\n') {
488                                bkch();
489                            }
490                            bappend('\n');
491                            break;
492
493                        case EOS:
494                            panic(FAULT);
495
496                        default:
497                            bappend(ch);
498                            break;
499                    }
500                    break;
501
502                default:
503                    panic(FAULT);
504            }
505        }
506
507        return mEvt;
508    }
509
510    /**
511     * Parses the document type declaration.
512     *
513     * @exception Exception is parser specific exception form panic method.
514     * @exception IOException
515     */
516    private void dtd() throws Exception {
517        char ch;
518        String str = null;
519        String name = null;
520        Pair psid = null;
521        // read 'DOCTYPE'
522        if ("DOCTYPE".equals(name(false)) != true) {
523            panic(FAULT);
524        }
525        mPh = PH_DTD;  // DTD
526        for (short st = 0; st >= 0;) {
527            ch = getch();
528            switch (st) {
529                case 0:     // read the document type name
530                    if (chtyp(ch) != ' ') {
531                        bkch();
532                        name = name(mIsNSAware);
533                        wsskip();
534                        st = 1;  // read 'PUPLIC' or 'SYSTEM'
535                    }
536                    break;
537
538                case 1:     // read 'PUPLIC' or 'SYSTEM'
539                    switch (chtyp(ch)) {
540                        case 'A':
541                            bkch();
542                            psid = pubsys(' ');
543                            st = 2;  // skip spaces before internal subset
544                            docType(name, psid.name, psid.value);
545                            break;
546
547                        case '[':
548                            bkch();
549                            st = 2;    // skip spaces before internal subset
550                            docType(name, null, null);
551                            break;
552
553                        case '>':
554                            bkch();
555                            st = 3;    // skip spaces after internal subset
556                            docType(name, null, null);
557                            break;
558
559                        default:
560                            panic(FAULT);
561                    }
562                    break;
563
564                case 2:     // skip spaces before internal subset
565                    switch (chtyp(ch)) {
566                        case '[':
567                            //          Process internal subset
568                            dtdsub();
569                            st = 3;  // skip spaces after internal subset
570                            break;
571
572                        case '>':
573                            //          There is no internal subset
574                            bkch();
575                            st = 3;  // skip spaces after internal subset
576                            break;
577
578                        case ' ':
579                            // skip white spaces
580                            break;
581
582                        default:
583                            panic(FAULT);
584                    }
585                    break;
586
587                case 3:     // skip spaces after internal subset
588                    switch (chtyp(ch)) {
589                        case '>':
590                            if (psid != null) {
591                                //              Report the DTD external subset
592                                InputSource is = resolveEnt(name, psid.name, psid.value);
593                                if (is != null) {
594                                    if (mIsSAlone == false) {
595                                        //              Set the end of DTD external subset char
596                                        bkch();
597                                        setch(']');
598                                        //              Set the DTD external subset InputSource
599                                        push(new Input(BUFFSIZE_READER));
600                                        setinp(is);
601                                        mInp.pubid = psid.name;
602                                        mInp.sysid = psid.value;
603                                        //              Parse the DTD external subset
604                                        dtdsub();
605                                    } else {
606                                        //              Unresolved DTD external subset
607                                        skippedEnt("[dtd]");
608                                        //              Release reader and stream
609                                        if (is.getCharacterStream() != null) {
610                                            try {
611                                                is.getCharacterStream().close();
612                                            } catch (IOException ioe) {
613                                            }
614                                        }
615                                        if (is.getByteStream() != null) {
616                                            try {
617                                                is.getByteStream().close();
618                                            } catch (IOException ioe) {
619                                            }
620                                        }
621                                    }
622                                } else {
623                                    //          Unresolved DTD external subset
624                                    skippedEnt("[dtd]");
625                                }
626                                del(psid);
627                            }
628                            st = -1;  // end of DTD
629                            break;
630
631                        case ' ':
632                            // skip white spaces
633                            break;
634
635                        default:
636                            panic(FAULT);
637                    }
638                    break;
639
640                default:
641                    panic(FAULT);
642            }
643        }
644    }
645
646    /**
647     * Parses the document type declaration subset.
648     *
649     * @exception Exception is parser specific exception form panic method.
650     * @exception IOException
651     */
652    private void dtdsub() throws Exception {
653        char ch;
654        for (short st = 0; st >= 0;) {
655            ch = getch();
656            switch (st) {
657                case 0:     // skip white spaces before a declaration
658                    switch (chtyp(ch)) {
659                        case '<':
660                            ch = getch();
661                            switch (ch) {
662                                case '?':
663                                    pi();
664                                    break;
665
666                                case '!':
667                                    ch = getch();
668                                    bkch();
669                                    if (ch == '-') {
670                                        comm();
671                                        break;
672                                    }
673                                    //          A markup or an entity declaration
674                                    bntok();
675                                    switch (bkeyword()) {
676                                        case 'n':
677                                            dtdent();
678                                            break;
679
680                                        case 'a':
681                                            dtdattl();    // parse attributes declaration
682                                            break;
683
684                                        case 'e':
685                                            dtdelm();     // parse element declaration
686                                            break;
687
688                                        case 'o':
689                                            dtdnot();     // parse notation declaration
690                                            break;
691
692                                        default:
693                                            panic(FAULT); // unsupported markup declaration
694                                            break;
695                                    }
696                                    st = 1;  // read the end of declaration
697                                    break;
698
699                                default:
700                                    panic(FAULT);
701                                    break;
702                            }
703                            break;
704
705                        case '%':
706                            //          A parameter entity reference
707                            pent(' ');
708                            break;
709
710                        case ']':
711                            //          End of DTD subset
712                            st = -1;
713                            break;
714
715                        case ' ':
716                            //          Skip white spaces
717                            break;
718
719                        case 'Z':
720                            //          End of stream
721                            if (getch() != ']') {
722                                panic(FAULT);
723                            }
724                            st = -1;
725                            break;
726
727                        default:
728                            panic(FAULT);
729                    }
730                    break;
731
732                case 1:     // read the end of declaration
733                    switch (ch) {
734                        case '>':   // there is no notation
735                            st = 0; // skip white spaces before a declaration
736                            break;
737
738                        case ' ':
739                        case '\n':
740                        case '\r':
741                        case '\t':
742                            //          Skip white spaces
743                            break;
744
745                        default:
746                            panic(FAULT);
747                            break;
748                    }
749                    break;
750
751                default:
752                    panic(FAULT);
753            }
754        }
755    }
756
757    /**
758     * Parses an entity declaration. This method fills the general (
759     * <code>mEnt</code>) and parameter
760     * (
761     * <code>mPEnt</code>) entity look up table.
762     *
763     * @exception Exception is parser specific exception form panic method.
764     * @exception IOException
765     */
766    @SuppressWarnings("fallthrough")
767    private void dtdent() throws Exception {
768        String str = null;
769        char[] val = null;
770        Input inp = null;
771        Pair ids = null;
772        char ch;
773        for (short st = 0; st >= 0;) {
774            ch = getch();
775            switch (st) {
776                case 0:     // skip white spaces before entity name
777                    switch (chtyp(ch)) {
778                        case ' ':
779                            //          Skip white spaces
780                            break;
781
782                        case '%':
783                            //          Parameter entity or parameter entity declaration.
784                            ch = getch();
785                            bkch();
786                            if (chtyp(ch) == ' ') {
787                                //              Parameter entity declaration.
788                                wsskip();
789                                str = name(false);
790                                switch (chtyp(wsskip())) {
791                                    case 'A':
792                                        //              Read the external identifier
793                                        ids = pubsys(' ');
794                                        if (wsskip() == '>') {
795                                            //          External parsed entity
796                                            if (mPEnt.containsKey(str) == false) {      // [#4.2]
797                                                inp = new Input();
798                                                inp.pubid = ids.name;
799                                                inp.sysid = ids.value;
800                                                mPEnt.put(str, inp);
801                                            }
802                                        } else {
803                                            panic(FAULT);
804                                        }
805                                        del(ids);
806                                        st = -1;  // the end of declaration
807                                        break;
808
809                                    case '\"':
810                                    case '\'':
811                                        //              Read the parameter entity value
812                                        bqstr('d');
813                                        //              Create the parameter entity value
814                                        val = new char[mBuffIdx + 1];
815                                        System.arraycopy(mBuff, 1, val, 1, val.length - 1);
816                                        //              Add surrounding spaces [#4.4.8]
817                                        val[0] = ' ';
818                                        //              Add the entity to the entity look up table
819                                        if (mPEnt.containsKey(str) == false) {  // [#4.2]
820                                            inp = new Input(val);
821                                            inp.pubid = mInp.pubid;
822                                            inp.sysid = mInp.sysid;
823                                            inp.xmlenc = mInp.xmlenc;
824                                            inp.xmlver = mInp.xmlver;
825                                            mPEnt.put(str, inp);
826                                        }
827                                        st = -1;  // the end of declaration
828                                        break;
829
830                                    default:
831                                        panic(FAULT);
832                                        break;
833                                }
834                            } else {
835                                //              Parameter entity reference.
836                                pent(' ');
837                            }
838                            break;
839
840                        default:
841                            bkch();
842                            str = name(false);
843                            st = 1;  // read entity declaration value
844                            break;
845                    }
846                    break;
847
848                case 1:     // read entity declaration value
849                    switch (chtyp(ch)) {
850                        case '\"':  // internal entity
851                        case '\'':
852                            bkch();
853                            bqstr('d');  // read a string into the buffer
854                            if (mEnt.get(str) == null) {
855                                //              Create general entity value
856                                val = new char[mBuffIdx];
857                                System.arraycopy(mBuff, 1, val, 0, val.length);
858                                //              Add the entity to the entity look up table
859                                if (mEnt.containsKey(str) == false) {   // [#4.2]
860                                    inp = new Input(val);
861                                    inp.pubid = mInp.pubid;
862                                    inp.sysid = mInp.sysid;
863                                    inp.xmlenc = mInp.xmlenc;
864                                    inp.xmlver = mInp.xmlver;
865                                    mEnt.put(str, inp);
866                                }
867                            }
868                            st = -1;  // the end of declaration
869                            break;
870
871                        case 'A':  // external entity
872                            bkch();
873                            ids = pubsys(' ');
874                            switch (wsskip()) {
875                                case '>':  // external parsed entity
876                                    if (mEnt.containsKey(str) == false) {  // [#4.2]
877                                        inp = new Input();
878                                        inp.pubid = ids.name;
879                                        inp.sysid = ids.value;
880                                        mEnt.put(str, inp);
881                                    }
882                                    break;
883
884                                case 'N':  // external general unparsed entity
885                                    if ("NDATA".equals(name(false)) == true) {
886                                        wsskip();
887                                        unparsedEntDecl(str, ids.name, ids.value, name(false));
888                                        break;
889                                    }
890                                default:
891                                    panic(FAULT);
892                                    break;
893                            }
894                            del(ids);
895                            st = -1;  // the end of declaration
896                            break;
897
898                        case ' ':
899                            //          Skip white spaces
900                            break;
901
902                        default:
903                            panic(FAULT);
904                            break;
905                    }
906                    break;
907
908                default:
909                    panic(FAULT);
910            }
911        }
912    }
913
914    /**
915     * Parses an element declaration.
916     *
917     * This method parses the declaration up to the closing angle bracket.
918     *
919     * @exception Exception is parser specific exception form panic method.
920     * @exception IOException
921     */
922    @SuppressWarnings("fallthrough")
923    private void dtdelm() throws Exception {
924        //              This is stub implementation which skips an element
925        //              declaration.
926        wsskip();
927        name(mIsNSAware);
928
929        char ch;
930        while (true) {
931            ch = getch();
932            switch (ch) {
933                case '>':
934                    bkch();
935                    return;
936
937                case EOS:
938                    panic(FAULT);
939
940                default:
941                    break;
942            }
943        }
944    }
945
946    /**
947     * Parses an attribute list declaration.
948     *
949     * This method parses the declaration up to the closing angle bracket.
950     *
951     * @exception Exception is parser specific exception form panic method.
952     * @exception IOException
953     */
954    private void dtdattl() throws Exception {
955        char elmqn[] = null;
956        Pair elm = null;
957        char ch;
958        for (short st = 0; st >= 0;) {
959            ch = getch();
960            switch (st) {
961                case 0:     // read the element name
962                    switch (chtyp(ch)) {
963                        case 'a':
964                        case 'A':
965                        case '_':
966                        case 'X':
967                        case ':':
968                            bkch();
969                            //          Get the element from the list or add a new one.
970                            elmqn = qname(mIsNSAware);
971                            elm = find(mAttL, elmqn);
972                            if (elm == null) {
973                                elm = pair(mAttL);
974                                elm.chars = elmqn;
975                                mAttL = elm;
976                            }
977                            st = 1;  // read an attribute declaration
978                            break;
979
980                        case ' ':
981                            break;
982
983                        case '%':
984                            pent(' ');
985                            break;
986
987                        default:
988                            panic(FAULT);
989                            break;
990                    }
991                    break;
992
993                case 1:     // read an attribute declaration
994                    switch (chtyp(ch)) {
995                        case 'a':
996                        case 'A':
997                        case '_':
998                        case 'X':
999                        case ':':
1000                            bkch();
1001                            dtdatt(elm);
1002                            if (wsskip() == '>') {
1003                                return;
1004                            }
1005                            break;
1006
1007                        case ' ':
1008                            break;
1009
1010                        case '%':
1011                            pent(' ');
1012                            break;
1013
1014                        default:
1015                            panic(FAULT);
1016                            break;
1017                    }
1018                    break;
1019
1020                default:
1021                    panic(FAULT);
1022                    break;
1023            }
1024        }
1025    }
1026
1027    /**
1028     * Parses an attribute declaration.
1029     *
1030     * The attribute uses the following fields of Pair object: chars - characters
1031     * of qualified name id - the type identifier of the attribute list - a pair
1032     * which holds the default value (chars field)
1033     *
1034     * @param elm An object which represents all defined attributes on an
1035     * element.
1036     * @exception Exception is parser specific exception form panic method.
1037     * @exception IOException
1038     */
1039    @SuppressWarnings("fallthrough")
1040    private void dtdatt(Pair elm) throws Exception {
1041        char attqn[] = null;
1042        Pair att = null;
1043        char ch;
1044        for (short st = 0; st >= 0;) {
1045            ch = getch();
1046            switch (st) {
1047                case 0:     // the attribute name
1048                    switch (chtyp(ch)) {
1049                        case 'a':
1050                        case 'A':
1051                        case '_':
1052                        case 'X':
1053                        case ':':
1054                            bkch();
1055                            //          Get the attribute from the list or add a new one.
1056                            attqn = qname(mIsNSAware);
1057                            att = find(elm.list, attqn);
1058                            if (att == null) {
1059                                //              New attribute declaration
1060                                att = pair(elm.list);
1061                                att.chars = attqn;
1062                                elm.list = att;
1063                            } else {
1064                                //              Do not override the attribute declaration [#3.3]
1065                                att = pair(null);
1066                                att.chars = attqn;
1067                                att.id = 'c';
1068                            }
1069                            wsskip();
1070                            st = 1;
1071                            break;
1072
1073                        case '%':
1074                            pent(' ');
1075                            break;
1076
1077                        case ' ':
1078                            break;
1079
1080                        default:
1081                            panic(FAULT);
1082                            break;
1083                    }
1084                    break;
1085
1086                case 1:     // the attribute type
1087                    switch (chtyp(ch)) {
1088                        case '(':
1089                            att.id = 'u';  // enumeration type
1090                            st = 2;        // read the first element of the list
1091                            break;
1092
1093                        case '%':
1094                            pent(' ');
1095                            break;
1096
1097                        case ' ':
1098                            break;
1099
1100                        default:
1101                            bkch();
1102                            bntok();  // read type id
1103                            att.id = bkeyword();
1104                            switch (att.id) {
1105                                case 'o':   // NOTATION
1106                                    if (wsskip() != '(') {
1107                                        panic(FAULT);
1108                                    }
1109                                    ch = getch();
1110                                    st = 2;  // read the first element of the list
1111                                    break;
1112
1113                                case 'i':     // ID
1114                                case 'r':     // IDREF
1115                                case 'R':     // IDREFS
1116                                case 'n':     // ENTITY
1117                                case 'N':     // ENTITIES
1118                                case 't':     // NMTOKEN
1119                                case 'T':     // NMTOKENS
1120                                case 'c':     // CDATA
1121                                    wsskip();
1122                                    st = 4;  // read default declaration
1123                                    break;
1124
1125                                default:
1126                                    panic(FAULT);
1127                                    break;
1128                            }
1129                            break;
1130                    }
1131                    break;
1132
1133                case 2:     // read the first element of the list
1134                    switch (chtyp(ch)) {
1135                        case 'a':
1136                        case 'A':
1137                        case 'd':
1138                        case '.':
1139                        case ':':
1140                        case '-':
1141                        case '_':
1142                        case 'X':
1143                            bkch();
1144                            switch (att.id) {
1145                                case 'u':  // enumeration type
1146                                    bntok();
1147                                    break;
1148
1149                                case 'o':  // NOTATION
1150                                    mBuffIdx = -1;
1151                                    bname(false);
1152                                    break;
1153
1154                                default:
1155                                    panic(FAULT);
1156                                    break;
1157                            }
1158                            wsskip();
1159                            st = 3;  // read next element of the list
1160                            break;
1161
1162                        case '%':
1163                            pent(' ');
1164                            break;
1165
1166                        case ' ':
1167                            break;
1168
1169                        default:
1170                            panic(FAULT);
1171                            break;
1172                    }
1173                    break;
1174
1175                case 3:     // read next element of the list
1176                    switch (ch) {
1177                        case ')':
1178                            wsskip();
1179                            st = 4;  // read default declaration
1180                            break;
1181
1182                        case '|':
1183                            wsskip();
1184                            switch (att.id) {
1185                                case 'u':  // enumeration type
1186                                    bntok();
1187                                    break;
1188
1189                                case 'o':  // NOTATION
1190                                    mBuffIdx = -1;
1191                                    bname(false);
1192                                    break;
1193
1194                                default:
1195                                    panic(FAULT);
1196                                    break;
1197                            }
1198                            wsskip();
1199                            break;
1200
1201                        case '%':
1202                            pent(' ');
1203                            break;
1204
1205                        default:
1206                            panic(FAULT);
1207                            break;
1208                    }
1209                    break;
1210
1211                case 4:     // read default declaration
1212                    switch (ch) {
1213                        case '#':
1214                            bntok();
1215                            switch (bkeyword()) {
1216                                case 'F':  // FIXED
1217                                    switch (wsskip()) {
1218                                        case '\"':
1219                                        case '\'':
1220                                            st = 5;  // read the default value
1221                                            break;
1222
1223                                        case EOS:
1224                                            panic(FAULT);
1225
1226                                        default:
1227                                            st = -1;
1228                                            break;
1229                                    }
1230                                    break;
1231
1232                                case 'Q':  // REQUIRED
1233                                case 'I':  // IMPLIED
1234                                    st = -1;
1235                                    break;
1236
1237                                default:
1238                                    panic(FAULT);
1239                                    break;
1240                            }
1241                            break;
1242
1243                        case '\"':
1244                        case '\'':
1245                            bkch();
1246                            st = 5;  // read the default value
1247                            break;
1248
1249                        case ' ':
1250                        case '\n':
1251                        case '\r':
1252                        case '\t':
1253                            break;
1254
1255                        case '%':
1256                            pent(' ');
1257                            break;
1258
1259                        default:
1260                            bkch();
1261                            st = -1;
1262                            break;
1263                    }
1264                    break;
1265
1266                case 5:     // read the default value
1267                    switch (ch) {
1268                        case '\"':
1269                        case '\'':
1270                            bkch();
1271                            bqstr('d');  // the value in the mBuff now
1272                            att.list = pair(null);
1273                            //          Create a string like "attqname='value' "
1274                            att.list.chars = new char[att.chars.length + mBuffIdx + 3];
1275                            System.arraycopy(
1276                                    att.chars, 1, att.list.chars, 0, att.chars.length - 1);
1277                            att.list.chars[att.chars.length - 1] = '=';
1278                            att.list.chars[att.chars.length] = ch;
1279                            System.arraycopy(
1280                                    mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
1281                            att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
1282                            att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
1283                            st = -1;
1284                            break;
1285
1286                        default:
1287                            panic(FAULT);
1288                            break;
1289                    }
1290                    break;
1291
1292                default:
1293                    panic(FAULT);
1294                    break;
1295            }
1296        }
1297    }
1298
1299    /**
1300     * Parses a notation declaration.
1301     *
1302     * This method parses the declaration up to the closing angle bracket.
1303     *
1304     * @exception Exception is parser specific exception form panic method.
1305     * @exception IOException
1306     */
1307    private void dtdnot() throws Exception {
1308        wsskip();
1309        String name = name(false);
1310        wsskip();
1311        Pair ids = pubsys('N');
1312        notDecl(name, ids.name, ids.value);
1313        del(ids);
1314    }
1315
1316    /**
1317     * Parses an attribute.
1318     *
1319     * This recursive method is responsible for prefix addition
1320     * (
1321     * <code>mPref</code>) on the way down. The element's start tag end triggers
1322     * the return process. The method then on it's way back resolves prefixes
1323     * and accumulates attributes.
1324     *
1325     * <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is
1326     * declared in DTD (attribute decalration had been read); 0x2 - attribute's
1327     * default value is used.</p>
1328     *
1329     * @param att An object which reprecents current attribute.
1330     * @exception Exception is parser specific exception form panic method.
1331     * @exception IOException
1332     */
1333    @SuppressWarnings("fallthrough")
1334    private void attr(Pair att) throws Exception {
1335        switch (wsskip()) {
1336            case '/':
1337            case '>':
1338                if ((att.num & 0x2) == 0) {  // all attributes have been read
1339                    att.num |= 0x2;  // set default attribute flag
1340                    Input inp = mInp;
1341                    //          Go through all attributes defined on current element.
1342                    for (Pair def = mElm.list; def != null; def = def.next) {
1343                        if (def.list == null) // no default value
1344                        {
1345                            continue;
1346                        }
1347                        //              Go through all attributes defined on current
1348                        //              element and add defaults.
1349                        Pair act = find(att.next, def.chars);
1350                        if (act == null) {
1351                            push(new Input(def.list.chars));
1352                        }
1353                    }
1354                    if (mInp != inp) {  // defaults have been added
1355                        attr(att);
1356                        return;
1357                    }
1358                }
1359                //              Ensure the attribute string array capacity
1360                mAttrs.setLength(mAttrIdx);
1361                mItems = mAttrs.mItems;
1362                return;
1363
1364            case EOS:
1365                panic(FAULT);
1366
1367            default:
1368                //              Read the attribute name and value
1369                att.chars = qname(mIsNSAware);
1370                att.name = att.local();
1371                String type = atype(att);  // sets attribute's type on att.id
1372                wsskip();
1373                if (getch() != '=') {
1374                    panic(FAULT);
1375                }
1376                bqstr((char) att.id);   // read the value with normalization.
1377                String val = new String(mBuff, 1, mBuffIdx);
1378                Pair next = pair(att);
1379                next.num = (att.num & ~0x1);  // inherit attribute flags
1380                //              Put a namespace declaration on top of the prefix stack
1381                if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
1382                    //          An ordinary attribute
1383                    mAttrIdx++;
1384                    attr(next);     // recursive call to parse the next attribute
1385                    mAttrIdx--;
1386                    //          Add the attribute to the attributes string array
1387                    char idx = (char) (mAttrIdx << 3);
1388                    mItems[idx + 1] = att.qname();  // attr qname
1389                    mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
1390                    mItems[idx + 3] = val;          // attr value
1391                    mItems[idx + 4] = type;         // attr type
1392                    switch (att.num & 0x3) {
1393                        case 0x0:
1394                            mItems[idx + 5] = null;
1395                            break;
1396
1397                        case 0x1:  // declared attribute
1398                            mItems[idx + 5] = "d";
1399                            break;
1400
1401                        default:  // 0x2, 0x3 - default attribute always declared
1402                            mItems[idx + 5] = "D";
1403                            break;
1404                    }
1405                    //          Resolve the prefix if any and report the attribute
1406                    //          NOTE: The attribute does not accept the default namespace.
1407                    mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : "";
1408                } else {
1409                    //          A namespace declaration. mPref.name contains prefix and
1410                    //          mPref.value contains namespace URI set by isdecl method.
1411                    //          Report a start of the new mapping
1412                    newPrefix();
1413                    //          Recursive call to parse the next attribute
1414                    attr(next);
1415                    //          NOTE: The namespace declaration is not reported.
1416                }
1417                del(next);
1418                break;
1419        }
1420    }
1421
1422    /**
1423     * Retrieves attribute type.
1424     *
1425     * This method sets the type of normalization in the attribute
1426     * <code>id</code> field and returns the name of attribute type.
1427     *
1428     * @param att An object which represents current attribute.
1429     * @return The name of the attribute type.
1430     * @exception Exception is parser specific exception form panic method.
1431     */
1432    private String atype(Pair att)
1433            throws Exception {
1434        Pair attr;
1435
1436        // CDATA-type normalization by default [#3.3.3]
1437        att.id = 'c';
1438        if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) {
1439            return "CDATA";
1440        }
1441
1442        att.num |= 0x1;  // attribute is declared
1443
1444        // Non-CDATA normalization except when the attribute type is CDATA.
1445        att.id = 'i';
1446        switch (attr.id) {
1447            case 'i':
1448                return "ID";
1449
1450            case 'r':
1451                return "IDREF";
1452
1453            case 'R':
1454                return "IDREFS";
1455
1456            case 'n':
1457                return "ENTITY";
1458
1459            case 'N':
1460                return "ENTITIES";
1461
1462            case 't':
1463                return "NMTOKEN";
1464
1465            case 'T':
1466                return "NMTOKENS";
1467
1468            case 'u':
1469                return "NMTOKEN";
1470
1471            case 'o':
1472                return "NOTATION";
1473
1474            case 'c':
1475                att.id = 'c';
1476                return "CDATA";
1477
1478            default:
1479                panic(FAULT);
1480        }
1481        return null;
1482    }
1483
1484    /**
1485     * Parses a comment.
1486     *
1487     * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1488     * with first &apos;-&apos; after &apos;&lt;!&apos;.
1489     *
1490     * @exception Exception is parser specific exception form panic method.
1491     */
1492    @SuppressWarnings("fallthrough")
1493    private void comm() throws Exception {
1494        if (mPh == PH_DOC_START) {
1495            mPh = PH_MISC_DTD;  // misc before DTD
1496        }               // '<!' has been already read by dispetcher.
1497        char ch;
1498        mBuffIdx = -1;
1499        for (short st = 0; st >= 0;) {
1500            ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1501            if (ch == EOS) {
1502                panic(FAULT);
1503            }
1504            switch (st) {
1505                case 0:     // first '-' of the comment open
1506                    if (ch == '-') {
1507                        st = 1;
1508                    } else {
1509                        panic(FAULT);
1510                    }
1511                    break;
1512
1513                case 1:     // secind '-' of the comment open
1514                    if (ch == '-') {
1515                        st = 2;
1516                    } else {
1517                        panic(FAULT);
1518                    }
1519                    break;
1520
1521                case 2:     // skip the comment body
1522                    switch (ch) {
1523                        case '-':
1524                            st = 3;
1525                            break;
1526
1527                        default:
1528                            bappend(ch);
1529                            break;
1530                    }
1531                    break;
1532
1533                case 3:     // second '-' of the comment close
1534                    switch (ch) {
1535                        case '-':
1536                            st = 4;
1537                            break;
1538
1539                        default:
1540                            bappend('-');
1541                            bappend(ch);
1542                            st = 2;
1543                            break;
1544                    }
1545                    break;
1546
1547                case 4:     // '>' of the comment close
1548                    if (ch == '>') {
1549                        comm(mBuff, mBuffIdx + 1);
1550                        st = -1;
1551                        break;
1552                    }
1553                // else - panic [#2.5 compatibility note]
1554
1555                default:
1556                    panic(FAULT);
1557            }
1558        }
1559    }
1560
1561    /**
1562     * Parses a processing instruction.
1563     *
1564     * The &apos;&lt;?&apos; is read in dispatcher so the method starts with
1565     * first character of PI target name after &apos;&lt;?&apos;.
1566     *
1567     * @exception Exception is parser specific exception form panic method.
1568     * @exception IOException
1569     */
1570    private void pi() throws Exception {
1571        // '<?' has been already read by dispetcher.
1572        char ch;
1573        String str = null;
1574        mBuffIdx = -1;
1575        for (short st = 0; st >= 0;) {
1576            ch = getch();
1577            if (ch == EOS) {
1578                panic(FAULT);
1579            }
1580            switch (st) {
1581                case 0:     // read the PI target name
1582                    switch (chtyp(ch)) {
1583                        case 'a':
1584                        case 'A':
1585                        case '_':
1586                        case ':':
1587                        case 'X':
1588                            bkch();
1589                            str = name(false);
1590                            //          PI target name may not be empty string [#2.6]
1591                            //          PI target name 'XML' is reserved [#2.6]
1592                            if ((str.length() == 0)
1593                                    || (mXml.name.equals(str.toLowerCase()) == true)) {
1594                                panic(FAULT);
1595                            }
1596                            //          This is processing instruction
1597                            if (mPh == PH_DOC_START) // the begining of the document
1598                            {
1599                                mPh = PH_MISC_DTD;    // misc before DTD
1600                            }
1601                            wsskip();  // skip spaces after the PI target name
1602                            st = 1;    // accumulate the PI body
1603                            mBuffIdx = -1;
1604                            break;
1605
1606                        default:
1607                            panic(FAULT);
1608                    }
1609                    break;
1610
1611                case 1:     // accumulate the PI body
1612                    switch (ch) {
1613                        case '?':
1614                            st = 2;  // end of the PI body
1615                            break;
1616
1617                        default:
1618                            bappend(ch);
1619                            break;
1620                    }
1621                    break;
1622
1623                case 2:     // end of the PI body
1624                    switch (ch) {
1625                        case '>':
1626                            //          PI has been read.
1627                            pi(str, new String(mBuff, 0, mBuffIdx + 1));
1628                            st = -1;
1629                            break;
1630
1631                        case '?':
1632                            bappend('?');
1633                            break;
1634
1635                        default:
1636                            bappend('?');
1637                            bappend(ch);
1638                            st = 1;  // accumulate the PI body
1639                            break;
1640                    }
1641                    break;
1642
1643                default:
1644                    panic(FAULT);
1645            }
1646        }
1647    }
1648
1649    /**
1650     * Parses a character data.
1651     *
1652     * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1653     * with first &apos;[&apos; after &apos;&lt;!&apos;.
1654     *
1655     * @exception Exception is parser specific exception form panic method.
1656     * @exception IOException
1657     */
1658    private void cdat()
1659            throws Exception {
1660        // '<!' has been already read by dispetcher.
1661        char ch;
1662        mBuffIdx = -1;
1663        for (short st = 0; st >= 0;) {
1664            ch = getch();
1665            switch (st) {
1666                case 0:     // the first '[' of the CDATA open
1667                    if (ch == '[') {
1668                        st = 1;
1669                    } else {
1670                        panic(FAULT);
1671                    }
1672                    break;
1673
1674                case 1:     // read "CDATA"
1675                    if (chtyp(ch) == 'A') {
1676                        bappend(ch);
1677                    } else {
1678                        if ("CDATA".equals(
1679                                new String(mBuff, 0, mBuffIdx + 1)) != true) {
1680                            panic(FAULT);
1681                        }
1682                        bkch();
1683                        st = 2;
1684                    }
1685                    break;
1686
1687                case 2:     // the second '[' of the CDATA open
1688                    if (ch != '[') {
1689                        panic(FAULT);
1690                    }
1691                    mBuffIdx = -1;
1692                    st = 3;
1693                    break;
1694
1695                case 3:     // read data before the first ']'
1696                    if (ch != ']') {
1697                        bappend(ch);
1698                    } else {
1699                        st = 4;
1700                    }
1701                    break;
1702
1703                case 4:     // read the second ']' or continue to read the data
1704                    if (ch != ']') {
1705                        bappend(']');
1706                        bappend(ch);
1707                        st = 3;
1708                    } else {
1709                        st = 5;
1710                    }
1711                    break;
1712
1713                case 5:     // read '>' or continue to read the data
1714                    switch (ch) {
1715                        case ']':
1716                            bappend(']');
1717                            break;
1718
1719                        case '>':
1720                            bflash();
1721                            st = -1;
1722                            break;
1723
1724                        default:
1725                            bappend(']');
1726                            bappend(']');
1727                            bappend(ch);
1728                            st = 3;
1729                            break;
1730                    }
1731                    break;
1732
1733                default:
1734                    panic(FAULT);
1735            }
1736        }
1737    }
1738
1739    /**
1740     * Reads a xml name.
1741     *
1742     * The xml name must conform "Namespaces in XML" specification. Therefore
1743     * the ':' character is not allowed in the name. This method should be used
1744     * for PI and entity names which may not have a namespace according to the
1745     * specification mentioned above.
1746     *
1747     * @param ns The true value turns namespace conformance on.
1748     * @return The name has been read.
1749     * @exception Exception When incorrect character appear in the name.
1750     * @exception IOException
1751     */
1752    protected String name(boolean ns)
1753            throws Exception {
1754        mBuffIdx = -1;
1755        bname(ns);
1756        return new String(mBuff, 1, mBuffIdx);
1757    }
1758
1759    /**
1760     * Reads a qualified xml name.
1761     *
1762     * The characters of a qualified name is an array of characters. The first
1763     * (chars[0]) character is the index of the colon character which separates
1764     * the prefix from the local name. If the index is zero, the name does not
1765     * contain separator or the parser works in the namespace unaware mode. The
1766     * length of qualified name is the length of the array minus one.
1767     *
1768     * @param ns The true value turns namespace conformance on.
1769     * @return The characters of a qualified name.
1770     * @exception Exception When incorrect character appear in the name.
1771     * @exception IOException
1772     */
1773    protected char[] qname(boolean ns)
1774            throws Exception {
1775        mBuffIdx = -1;
1776        bname(ns);
1777        char chars[] = new char[mBuffIdx + 1];
1778        System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1779        return chars;
1780    }
1781
1782    /**
1783     * Reads the public or/and system identifiers.
1784     *
1785     * @param inp The input object.
1786     * @exception Exception is parser specific exception form panic method.
1787     * @exception IOException
1788     */
1789    private void pubsys(Input inp)
1790            throws Exception {
1791        Pair pair = pubsys(' ');
1792        inp.pubid = pair.name;
1793        inp.sysid = pair.value;
1794        del(pair);
1795    }
1796
1797    /**
1798     * Reads the public or/and system identifiers.
1799     *
1800     * @param flag The 'N' allows public id be without system id.
1801     * @return The public or/and system identifiers pair.
1802     * @exception Exception is parser specific exception form panic method.
1803     * @exception IOException
1804     */
1805    @SuppressWarnings("fallthrough")
1806    private Pair pubsys(char flag) throws Exception {
1807        Pair ids = pair(null);
1808        String str = name(false);
1809        if ("PUBLIC".equals(str) == true) {
1810            bqstr('i');  // non-CDATA normalization [#4.2.2]
1811            ids.name = new String(mBuff, 1, mBuffIdx);
1812            switch (wsskip()) {
1813                case '\"':
1814                case '\'':
1815                    bqstr(' ');
1816                    ids.value = new String(mBuff, 1, mBuffIdx);
1817                    break;
1818
1819                case EOS:
1820                    panic(FAULT);
1821
1822                default:
1823                    if (flag != 'N') // [#4.7]
1824                    {
1825                        panic(FAULT);
1826                    }
1827                    ids.value = null;
1828                    break;
1829            }
1830            return ids;
1831        } else if ("SYSTEM".equals(str) == true) {
1832            ids.name = null;
1833            bqstr(' ');
1834            ids.value = new String(mBuff, 1, mBuffIdx);
1835            return ids;
1836        }
1837        panic(FAULT);
1838        return null;
1839    }
1840
1841    /**
1842     * Reads an attribute value.
1843     *
1844     * The grammar this method can read is:
1845     * <pre>{@code
1846     * eqstr := S "=" qstr
1847     * qstr  := S ("'" string "'") | ('"' string '"')
1848     * }</pre>
1849     * This method resolves entities
1850     * inside a string unless the parser parses DTD.
1851     *
1852     * @param flag The '=' character forces the method to accept the '='
1853     * character before quoted string and read the following string as not an
1854     * attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
1855     * '-' - not an attribute value; 'd' - in DTD context.
1856     * @return The content of the quoted strign as a string.
1857     * @exception Exception is parser specific exception form panic method.
1858     * @exception IOException
1859     */
1860    protected String eqstr(char flag) throws Exception {
1861        if (flag == '=') {
1862            wsskip();
1863            if (getch() != '=') {
1864                panic(FAULT);
1865            }
1866        }
1867        bqstr((flag == '=') ? '-' : flag);
1868        return new String(mBuff, 1, mBuffIdx);
1869    }
1870
1871    /**
1872     * Resoves an entity.
1873     *
1874     * This method resolves built-in and character entity references. It is also
1875     * reports external entities to the application.
1876     *
1877     * @param flag The 'x' character forces the method to report a skipped
1878     * entity; 'i' character - indicates non-CDATA normalization.
1879     * @return Name of unresolved entity or <code>null</code> if entity had been
1880     * resolved successfully.
1881     * @exception Exception is parser specific exception form panic method.
1882     * @exception IOException
1883     */
1884    @SuppressWarnings("fallthrough")
1885    private String ent(char flag) throws Exception {
1886        char ch;
1887        int idx = mBuffIdx + 1;
1888        Input inp = null;
1889        String str = null;
1890        mESt = 0x100;  // reset the built-in entity recognizer
1891        bappend('&');
1892        for (short st = 0; st >= 0;) {
1893            ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1894            switch (st) {
1895                case 0:     // the first character of the entity name
1896                case 1:     // read built-in entity name
1897                    switch (chtyp(ch)) {
1898                        case 'd':
1899                        case '.':
1900                        case '-':
1901                            if (st != 1) {
1902                                panic(FAULT);
1903                            }
1904                        case 'a':
1905                        case 'A':
1906                        case '_':
1907                        case 'X':
1908                            bappend(ch);
1909                            eappend(ch);
1910                            st = 1;
1911                            break;
1912
1913                        case ':':
1914                            if (mIsNSAware != false) {
1915                                panic(FAULT);
1916                            }
1917                            bappend(ch);
1918                            eappend(ch);
1919                            st = 1;
1920                            break;
1921
1922                        case ';':
1923                            if (mESt < 0x100) {
1924                                //              The entity is a built-in entity
1925                                mBuffIdx = idx - 1;
1926                                bappend(mESt);
1927                                st = -1;
1928                                break;
1929                            } else if (mPh == PH_DTD) {
1930                                //              In DTD entity declaration has to resolve character
1931                                //              entities and include "as is" others. [#4.4.7]
1932                                bappend(';');
1933                                st = -1;
1934                                break;
1935                            }
1936                            //          Convert an entity name to a string
1937                            str = new String(mBuff, idx + 1, mBuffIdx - idx);
1938                            inp = mEnt.get(str);
1939                            //          Restore the buffer offset
1940                            mBuffIdx = idx - 1;
1941                            if (inp != null) {
1942                                if (inp.chars == null) {
1943                                    //          External entity
1944                                    InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
1945                                    if (is != null) {
1946                                        push(new Input(BUFFSIZE_READER));
1947                                        setinp(is);
1948                                        mInp.pubid = inp.pubid;
1949                                        mInp.sysid = inp.sysid;
1950                                        str = null;  // the entity is resolved
1951                                    } else {
1952                                        //              Unresolved external entity
1953                                        if (flag != 'x') {
1954                                            panic(FAULT);  // unknown entity within marckup
1955                                        }                                                               //              str is name of unresolved entity
1956                                    }
1957                                } else {
1958                                    //          Internal entity
1959                                    push(inp);
1960                                    str = null;  // the entity is resolved
1961                                }
1962                            } else {
1963                                //              Unknown or general unparsed entity
1964                                if (flag != 'x') {
1965                                    panic(FAULT);  // unknown entity within marckup
1966                                }                                               //              str is name of unresolved entity
1967                            }
1968                            st = -1;
1969                            break;
1970
1971                        case '#':
1972                            if (st != 0) {
1973                                panic(FAULT);
1974                            }
1975                            st = 2;
1976                            break;
1977
1978                        default:
1979                            panic(FAULT);
1980                    }
1981                    break;
1982
1983                case 2:     // read character entity
1984                    switch (chtyp(ch)) {
1985                        case 'd':
1986                            bappend(ch);
1987                            break;
1988
1989                        case ';':
1990                            //          Convert the character entity to a character
1991                            try {
1992                                int i = Integer.parseInt(
1993                                        new String(mBuff, idx + 1, mBuffIdx - idx), 10);
1994                                if (i >= 0xffff) {
1995                                    panic(FAULT);
1996                                }
1997                                ch = (char) i;
1998                            } catch (NumberFormatException nfe) {
1999                                panic(FAULT);
2000                            }
2001                            //          Restore the buffer offset
2002                            mBuffIdx = idx - 1;
2003                            if (ch == ' ' || mInp.next != null) {
2004                                bappend(ch, flag);
2005                            } else {
2006                                bappend(ch);
2007                            }
2008                            st = -1;
2009                            break;
2010
2011                        case 'a':
2012                            //          If the entity buffer is empty and ch == 'x'
2013                            if ((mBuffIdx == idx) && (ch == 'x')) {
2014                                st = 3;
2015                                break;
2016                            }
2017                        default:
2018                            panic(FAULT);
2019                    }
2020                    break;
2021
2022                case 3:     // read hex character entity
2023                    switch (chtyp(ch)) {
2024                        case 'A':
2025                        case 'a':
2026                        case 'd':
2027                            bappend(ch);
2028                            break;
2029
2030                        case ';':
2031                            //          Convert the character entity to a character
2032                            try {
2033                                int i = Integer.parseInt(
2034                                        new String(mBuff, idx + 1, mBuffIdx - idx), 16);
2035                                if (i >= 0xffff) {
2036                                    panic(FAULT);
2037                                }
2038                                ch = (char) i;
2039                            } catch (NumberFormatException nfe) {
2040                                panic(FAULT);
2041                            }
2042                            //          Restore the buffer offset
2043                            mBuffIdx = idx - 1;
2044                            if (ch == ' ' || mInp.next != null) {
2045                                bappend(ch, flag);
2046                            } else {
2047                                bappend(ch);
2048                            }
2049                            st = -1;
2050                            break;
2051
2052                        default:
2053                            panic(FAULT);
2054                    }
2055                    break;
2056
2057                default:
2058                    panic(FAULT);
2059            }
2060        }
2061
2062        return str;
2063    }
2064
2065    /**
2066     * Resoves a parameter entity.
2067     *
2068     * This method resolves a parameter entity references. It is also reports
2069     * external entities to the application.
2070     *
2071     * @param flag The '-' instruct the method to do not set up surrounding
2072     * spaces [#4.4.8].
2073     * @exception Exception is parser specific exception form panic method.
2074     * @exception IOException
2075     */
2076    @SuppressWarnings("fallthrough")
2077    private void pent(char flag) throws Exception {
2078        char ch;
2079        int idx = mBuffIdx + 1;
2080        Input inp = null;
2081        String str = null;
2082        bappend('%');
2083        if (mPh != PH_DTD) // the DTD internal subset
2084        {
2085            return;         // Not Recognized [#4.4.1]
2086        }               //              Read entity name
2087        bname(false);
2088        str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
2089        if (getch() != ';') {
2090            panic(FAULT);
2091        }
2092        inp = mPEnt.get(str);
2093        //              Restore the buffer offset
2094        mBuffIdx = idx - 1;
2095        if (inp != null) {
2096            if (inp.chars == null) {
2097                //              External parameter entity
2098                InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
2099                if (is != null) {
2100                    if (flag != '-') {
2101                        bappend(' ');  // tail space
2102                    }
2103                    push(new Input(BUFFSIZE_READER));
2104                    // BUG: there is no leading space! [#4.4.8]
2105                    setinp(is);
2106                    mInp.pubid = inp.pubid;
2107                    mInp.sysid = inp.sysid;
2108                } else {
2109                    //          Unresolved external parameter entity
2110                    skippedEnt("%" + str);
2111                }
2112            } else {
2113                //              Internal parameter entity
2114                if (flag == '-') {
2115                    //          No surrounding spaces
2116                    inp.chIdx = 1;
2117                } else {
2118                    //          Insert surrounding spaces
2119                    bappend(' ');  // tail space
2120                    inp.chIdx = 0;
2121                }
2122                push(inp);
2123            }
2124        } else {
2125            //          Unknown parameter entity
2126            skippedEnt("%" + str);
2127        }
2128    }
2129
2130    /**
2131     * Recognizes and handles a namespace declaration.
2132     *
2133     * This method identifies a type of namespace declaration if any and puts
2134     * new mapping on top of prefix stack.
2135     *
2136     * @param name The attribute qualified name (<code>name.value</code> is a
2137     * <code>String</code> object which represents the attribute prefix).
2138     * @param value The attribute value.
2139     * @return <code>true</code> if a namespace declaration is recognized.
2140     */
2141    private boolean isdecl(Pair name, String value) {
2142        if (name.chars[0] == 0) {
2143            if ("xmlns".equals(name.name) == true) {
2144                //              New default namespace declaration
2145                mPref = pair(mPref);
2146                mPref.list = mElm;  // prefix owner element
2147                mPref.value = value;
2148                mPref.name = "";
2149                mPref.chars = NONS;
2150                mElm.num++;  // namespace counter
2151                return true;
2152            }
2153        } else {
2154            if (name.eqpref(XMLNS) == true) {
2155                //              New prefix declaration
2156                int len = name.name.length();
2157                mPref = pair(mPref);
2158                mPref.list = mElm;  // prefix owner element
2159                mPref.value = value;
2160                mPref.name = name.name;
2161                mPref.chars = new char[len + 1];
2162                mPref.chars[0] = (char) (len + 1);
2163                name.name.getChars(0, len, mPref.chars, 1);
2164                mElm.num++;  // namespace counter
2165                return true;
2166            }
2167        }
2168        return false;
2169    }
2170
2171    /**
2172     * Resolves a prefix.
2173     *
2174     * @return The namespace assigned to the prefix.
2175     * @exception Exception When mapping for specified prefix is not found.
2176     */
2177    private String rslv(char[] qname)
2178            throws Exception {
2179        for (Pair pref = mPref; pref != null; pref = pref.next) {
2180            if (pref.eqpref(qname) == true) {
2181                return pref.value;
2182            }
2183        }
2184        if (qname[0] == 1) {  // QNames like ':local'
2185            for (Pair pref = mPref; pref != null; pref = pref.next) {
2186                if (pref.chars[0] == 0) {
2187                    return pref.value;
2188                }
2189            }
2190        }
2191        panic(FAULT);
2192        return null;
2193    }
2194
2195    /**
2196     * Skips xml white space characters.
2197     *
2198     * This method skips white space characters (' ', '\t', '\n', '\r') and
2199     * looks ahead not white space character.
2200     *
2201     * @return The first not white space look ahead character.
2202     * @exception IOException
2203     */
2204    protected char wsskip()
2205            throws IOException {
2206        char ch;
2207        while (true) {
2208            //          Read next character
2209            ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2210            if (ch < 0x80) {
2211                if (nmttyp[ch] != 3) // [ \t\n\r]
2212                {
2213                    break;
2214                }
2215            } else {
2216                break;
2217            }
2218        }
2219        mChIdx--;  // bkch();
2220        return ch;
2221    }
2222
2223    /**
2224     * Reports document type.
2225     *
2226     * @param name The name of the entity.
2227     * @param pubid The public identifier of the entity or <code>null</code>.
2228     * @param sysid The system identifier of the entity or <code>null</code>.
2229     */
2230    protected abstract void docType(String name, String pubid, String sysid)
2231            throws SAXException;
2232
2233    /**
2234     * Reports a comment.
2235     *
2236     * @param text The comment text starting from first charcater.
2237     * @param length The number of characters in comment.
2238     */
2239    protected abstract void comm(char[] text, int length);
2240
2241    /**
2242     * Reports a processing instruction.
2243     *
2244     * @param target The processing instruction target name.
2245     * @param body The processing instruction body text.
2246     */
2247    protected abstract void pi(String target, String body)
2248            throws Exception;
2249
2250    /**
2251     * Reports new namespace prefix. The Namespace prefix (
2252     * <code>mPref.name</code>) being declared and the Namespace URI (
2253     * <code>mPref.value</code>) the prefix is mapped to. An empty string is
2254     * used for the default element namespace, which has no prefix.
2255     */
2256    protected abstract void newPrefix()
2257            throws Exception;
2258
2259    /**
2260     * Reports skipped entity name.
2261     *
2262     * @param name The entity name.
2263     */
2264    protected abstract void skippedEnt(String name)
2265            throws Exception;
2266
2267    /**
2268     * Returns an
2269     * <code>InputSource</code> for specified entity or
2270     * <code>null</code>.
2271     *
2272     * @param name The name of the entity.
2273     * @param pubid The public identifier of the entity.
2274     * @param sysid The system identifier of the entity.
2275     */
2276    protected abstract InputSource resolveEnt(
2277            String name, String pubid, String sysid)
2278            throws Exception;
2279
2280    /**
2281     * Reports notation declaration.
2282     *
2283     * @param name The notation's name.
2284     * @param pubid The notation's public identifier, or null if none was given.
2285     * @param sysid The notation's system identifier, or null if none was given.
2286     */
2287    protected abstract void notDecl(String name, String pubid, String sysid)
2288            throws Exception;
2289
2290    /**
2291     * Reports unparsed entity name.
2292     *
2293     * @param name The unparsed entity's name.
2294     * @param pubid The entity's public identifier, or null if none was given.
2295     * @param sysid The entity's system identifier.
2296     * @param notation The name of the associated notation.
2297     */
2298    protected abstract void unparsedEntDecl(
2299            String name, String pubid, String sysid, String notation)
2300            throws Exception;
2301
2302    /**
2303     * Notifies the handler about fatal parsing error.
2304     *
2305     * @param msg The problem description message.
2306     */
2307    protected abstract void panic(String msg)
2308            throws Exception;
2309
2310    /**
2311     * Reads a qualified xml name.
2312     *
2313     * This is low level routine which leaves a qName in the buffer. The
2314     * characters of a qualified name is an array of characters. The first
2315     * (chars[0]) character is the index of the colon character which separates
2316     * the prefix from the local name. If the index is zero, the name does not
2317     * contain separator or the parser works in the namespace unaware mode. The
2318     * length of qualified name is the length of the array minus one.
2319     *
2320     * @param ns The true value turns namespace conformance on.
2321     * @exception Exception is parser specific exception form panic method.
2322     * @exception IOException
2323     */
2324    private void bname(boolean ns)
2325            throws Exception {
2326        char ch;
2327        char type;
2328        mBuffIdx++;  // allocate a char for colon offset
2329        int bqname = mBuffIdx;
2330        int bcolon = bqname;
2331        int bchidx = bqname + 1;
2332        int bstart = bchidx;
2333        int cstart = mChIdx;
2334        short st = (short) ((ns == true) ? 0 : 2);
2335        while (true) {
2336            //          Read next character
2337            if (mChIdx >= mChLen) {
2338                bcopy(cstart, bstart);
2339                getch();
2340                mChIdx--;  // bkch();
2341                cstart = mChIdx;
2342                bstart = bchidx;
2343            }
2344            ch = mChars[mChIdx++];
2345            type = (char) 0;  // [X]
2346            if (ch < 0x80) {
2347                type = (char) nmttyp[ch];
2348            } else if (ch == EOS) {
2349                panic(FAULT);
2350            }
2351            //          Parse QName
2352            switch (st) {
2353                case 0:     // read the first char of the prefix
2354                case 2:     // read the first char of the suffix
2355                    switch (type) {
2356                        case 0:  // [aA_X]
2357                            bchidx++;  // append char to the buffer
2358                            st++;      // (st == 0)? 1: 3;
2359                            break;
2360
2361                        case 1:  // [:]
2362                            mChIdx--;  // bkch();
2363                            st++;      // (st == 0)? 1: 3;
2364                            break;
2365
2366                        default:
2367                            panic(FAULT);
2368                    }
2369                    break;
2370
2371                case 1:     // read the prefix
2372                case 3:     // read the suffix
2373                    switch (type) {
2374                        case 0:  // [aA_X]
2375                        case 2:  // [.-d]
2376                            bchidx++;  // append char to the buffer
2377                            break;
2378
2379                        case 1:  // [:]
2380                            bchidx++;  // append char to the buffer
2381                            if (ns == true) {
2382                                if (bcolon != bqname) {
2383                                    panic(FAULT);  // it must be only one colon
2384                                }
2385                                bcolon = bchidx - 1;
2386                                if (st == 1) {
2387                                    st = 2;
2388                                }
2389                            }
2390                            break;
2391
2392                        default:
2393                            mChIdx--;  // bkch();
2394                            bcopy(cstart, bstart);
2395                            mBuff[bqname] = (char) (bcolon - bqname);
2396                            return;
2397                    }
2398                    break;
2399
2400                default:
2401                    panic(FAULT);
2402            }
2403        }
2404    }
2405
2406    /**
2407     * Reads a nmtoken.
2408     *
2409     * This is low level routine which leaves a nmtoken in the buffer.
2410     *
2411     * @exception Exception is parser specific exception form panic method.
2412     * @exception IOException
2413     */
2414    @SuppressWarnings("fallthrough")
2415    private void bntok() throws Exception {
2416        char ch;
2417        mBuffIdx = -1;
2418        bappend((char) 0);  // default offset to the colon char
2419        while (true) {
2420            ch = getch();
2421            switch (chtyp(ch)) {
2422                case 'a':
2423                case 'A':
2424                case 'd':
2425                case '.':
2426                case ':':
2427                case '-':
2428                case '_':
2429                case 'X':
2430                    bappend(ch);
2431                    break;
2432
2433                case 'Z':
2434                    panic(FAULT);
2435
2436                default:
2437                    bkch();
2438                    return;
2439            }
2440        }
2441    }
2442
2443    /**
2444     * Recognizes a keyword.
2445     *
2446     * This is low level routine which recognizes one of keywords in the buffer.
2447     * Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
2448     * t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
2449     * Q IMPLIED - I FIXED - F
2450     *
2451     * @return an id of a keyword or '?'.
2452     * @exception Exception is parser specific exception form panic method.
2453     * @exception IOException
2454     */
2455    private char bkeyword()
2456            throws Exception {
2457        String str = new String(mBuff, 1, mBuffIdx);
2458        switch (str.length()) {
2459            case 2:  // ID
2460                return ("ID".equals(str) == true) ? 'i' : '?';
2461
2462            case 5:  // IDREF, CDATA, FIXED
2463                switch (mBuff[1]) {
2464                    case 'I':
2465                        return ("IDREF".equals(str) == true) ? 'r' : '?';
2466                    case 'C':
2467                        return ("CDATA".equals(str) == true) ? 'c' : '?';
2468                    case 'F':
2469                        return ("FIXED".equals(str) == true) ? 'F' : '?';
2470                    default:
2471                        break;
2472                }
2473                break;
2474
2475            case 6:  // IDREFS, ENTITY
2476                switch (mBuff[1]) {
2477                    case 'I':
2478                        return ("IDREFS".equals(str) == true) ? 'R' : '?';
2479                    case 'E':
2480                        return ("ENTITY".equals(str) == true) ? 'n' : '?';
2481                    default:
2482                        break;
2483                }
2484                break;
2485
2486            case 7:  // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
2487                switch (mBuff[1]) {
2488                    case 'I':
2489                        return ("IMPLIED".equals(str) == true) ? 'I' : '?';
2490                    case 'N':
2491                        return ("NMTOKEN".equals(str) == true) ? 't' : '?';
2492                    case 'A':
2493                        return ("ATTLIST".equals(str) == true) ? 'a' : '?';
2494                    case 'E':
2495                        return ("ELEMENT".equals(str) == true) ? 'e' : '?';
2496                    default:
2497                        break;
2498                }
2499                break;
2500
2501            case 8:  // ENTITIES, NMTOKENS, NOTATION, REQUIRED
2502                switch (mBuff[2]) {
2503                    case 'N':
2504                        return ("ENTITIES".equals(str) == true) ? 'N' : '?';
2505                    case 'M':
2506                        return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
2507                    case 'O':
2508                        return ("NOTATION".equals(str) == true) ? 'o' : '?';
2509                    case 'E':
2510                        return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
2511                    default:
2512                        break;
2513                }
2514                break;
2515
2516            default:
2517                break;
2518        }
2519        return '?';
2520    }
2521
2522    /**
2523     * Reads a single or double quotted string in to the buffer.
2524     *
2525     * This method resolves entities inside a string unless the parser parses
2526     * DTD.
2527     *
2528     * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
2529     * not an attribute value; 'd' - in DTD context.
2530     * @exception Exception is parser specific exception form panic method.
2531     * @exception IOException
2532     */
2533    @SuppressWarnings("fallthrough")
2534    private void bqstr(char flag) throws Exception {
2535        Input inp = mInp;  // remember the original input
2536        mBuffIdx = -1;
2537        bappend((char) 0);  // default offset to the colon char
2538        char ch;
2539        for (short st = 0; st >= 0;) {
2540            ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2541            switch (st) {
2542                case 0:     // read a single or double quote
2543                    switch (ch) {
2544                        case ' ':
2545                        case '\n':
2546                        case '\r':
2547                        case '\t':
2548                            break;
2549
2550                        case '\'':
2551                            st = 2;  // read a single quoted string
2552                            break;
2553
2554                        case '\"':
2555                            st = 3;  // read a double quoted string
2556                            break;
2557
2558                        default:
2559                            panic(FAULT);
2560                            break;
2561                    }
2562                    break;
2563
2564                case 2:     // read a single quoted string
2565                case 3:     // read a double quoted string
2566                    switch (ch) {
2567                        case '\'':
2568                            if ((st == 2) && (mInp == inp)) {
2569                                st = -1;
2570                            } else {
2571                                bappend(ch);
2572                            }
2573                            break;
2574
2575                        case '\"':
2576                            if ((st == 3) && (mInp == inp)) {
2577                                st = -1;
2578                            } else {
2579                                bappend(ch);
2580                            }
2581                            break;
2582
2583                        case '&':
2584                            if (flag != 'd') {
2585                                ent(flag);
2586                            } else {
2587                                bappend(ch);
2588                            }
2589                            break;
2590
2591                        case '%':
2592                            if (flag == 'd') {
2593                                pent('-');
2594                            } else {
2595                                bappend(ch);
2596                            }
2597                            break;
2598
2599                        case '<':
2600                            if ((flag == '-') || (flag == 'd')) {
2601                                bappend(ch);
2602                            } else {
2603                                panic(FAULT);
2604                            }
2605                            break;
2606
2607                        case EOS:               // EOS before single/double quote
2608                            panic(FAULT);
2609
2610                        case '\r':     // EOL processing [#2.11 & #3.3.3]
2611                            if (flag != ' ' && mInp.next == null) {
2612                                if (getch() != '\n') {
2613                                    bkch();
2614                                }
2615                                ch = '\n';
2616                            }
2617                        default:
2618                            bappend(ch, flag);
2619                            break;
2620                    }
2621                    break;
2622
2623                default:
2624                    panic(FAULT);
2625            }
2626        }
2627        //              There is maximum one space at the end of the string in
2628        //              i-mode (non CDATA normalization) and it has to be removed.
2629        if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) {
2630            mBuffIdx -= 1;
2631        }
2632    }
2633
2634    /**
2635     * Reports characters and empties the parser's buffer. This method is called
2636     * only if parser is going to return control to the main loop. This means
2637     * that this method may use parser buffer to report white space without
2638     * copying characters to temporary buffer.
2639     */
2640    protected abstract void bflash()
2641            throws Exception;
2642
2643    /**
2644     * Reports white space characters and empties the parser's buffer. This
2645     * method is called only if parser is going to return control to the main
2646     * loop. This means that this method may use parser buffer to report white
2647     * space without copying characters to temporary buffer.
2648     */
2649    protected abstract void bflash_ws()
2650            throws Exception;
2651
2652    /**
2653     * Appends a character to parser's buffer with normalization.
2654     *
2655     * @param ch The character to append to the buffer.
2656     * @param mode The normalization mode.
2657     */
2658    private void bappend(char ch, char mode) {
2659        //              This implements attribute value normalization as
2660        //              described in the XML specification [#3.3.3].
2661        switch (mode) {
2662            case 'i':  // non CDATA normalization
2663                switch (ch) {
2664                    case ' ':
2665                    case '\n':
2666                    case '\r':
2667                    case '\t':
2668                        if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) {
2669                            bappend(' ');
2670                        }
2671                        return;
2672
2673                    default:
2674                        break;
2675                }
2676                break;
2677
2678            case 'c':  // CDATA normalization
2679                switch (ch) {
2680                    case '\n':
2681                    case '\r':
2682                    case '\t':
2683                        ch = ' ';
2684                        break;
2685
2686                    default:
2687                        break;
2688                }
2689                break;
2690
2691            default:  // no normalization
2692                break;
2693        }
2694        mBuffIdx++;
2695        if (mBuffIdx < mBuff.length) {
2696            mBuff[mBuffIdx] = ch;
2697        } else {
2698            mBuffIdx--;
2699            bappend(ch);
2700        }
2701    }
2702
2703    /**
2704     * Appends a character to parser's buffer.
2705     *
2706     * @param ch The character to append to the buffer.
2707     */
2708    private void bappend(char ch) {
2709        try {
2710            mBuff[++mBuffIdx] = ch;
2711        } catch (Exception exp) {
2712            //          Double the buffer size
2713            char buff[] = new char[mBuff.length << 1];
2714            System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2715            mBuff = buff;
2716            mBuff[mBuffIdx] = ch;
2717        }
2718    }
2719
2720    /**
2721     * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2722     * parser's buffer (mBuff).
2723     *
2724     * @param cidx The character buffer (mChars) start index.
2725     * @param bidx The parser buffer (mBuff) start index.
2726     */
2727    private void bcopy(int cidx, int bidx) {
2728        int length = mChIdx - cidx;
2729        if ((bidx + length + 1) >= mBuff.length) {
2730            //          Expand the buffer
2731            char buff[] = new char[mBuff.length + length];
2732            System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2733            mBuff = buff;
2734        }
2735        System.arraycopy(mChars, cidx, mBuff, bidx, length);
2736        mBuffIdx += length;
2737    }
2738
2739    /**
2740     * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2741     * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2742     * 0x100 is a built-in entity replacement character.
2743     *
2744     * @param ch the next character of an entity name.
2745     */
2746    @SuppressWarnings("fallthrough")
2747    private void eappend(char ch) {
2748        switch (mESt) {
2749            case 0x100:  // "l" or "g" or "a" or "q"
2750                switch (ch) {
2751                    case 'l':
2752                        mESt = 0x101;
2753                        break;
2754                    case 'g':
2755                        mESt = 0x102;
2756                        break;
2757                    case 'a':
2758                        mESt = 0x103;
2759                        break;
2760                    case 'q':
2761                        mESt = 0x107;
2762                        break;
2763                    default:
2764                        mESt = 0x200;
2765                        break;
2766                }
2767                break;
2768
2769            case 0x101:  // "lt"
2770                mESt = (ch == 't') ? '<' : (char) 0x200;
2771                break;
2772
2773            case 0x102:  // "gt"
2774                mESt = (ch == 't') ? '>' : (char) 0x200;
2775                break;
2776
2777            case 0x103:  // "am" or "ap"
2778                switch (ch) {
2779                    case 'm':
2780                        mESt = 0x104;
2781                        break;
2782                    case 'p':
2783                        mESt = 0x105;
2784                        break;
2785                    default:
2786                        mESt = 0x200;
2787                        break;
2788                }
2789                break;
2790
2791            case 0x104:  // "amp"
2792                mESt = (ch == 'p') ? '&' : (char) 0x200;
2793                break;
2794
2795            case 0x105:  // "apo"
2796                mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
2797                break;
2798
2799            case 0x106:  // "apos"
2800                mESt = (ch == 's') ? '\'' : (char) 0x200;
2801                break;
2802
2803            case 0x107:  // "qu"
2804                mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
2805                break;
2806
2807            case 0x108:  // "quo"
2808                mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
2809                break;
2810
2811            case 0x109:  // "quot"
2812                mESt = (ch == 't') ? '\"' : (char) 0x200;
2813                break;
2814
2815            case '<':   // "lt"
2816            case '>':   // "gt"
2817            case '&':   // "amp"
2818            case '\'':  // "apos"
2819            case '\"':  // "quot"
2820                mESt = 0x200;
2821            default:
2822                break;
2823        }
2824    }
2825
2826    /**
2827     * Sets up a new input source on the top of the input stack. Note, the first
2828     * byte returned by the entity's byte stream has to be the first byte in the
2829     * entity. However, the parser does not expect the byte order mask in both
2830     * cases when encoding is provided by the input source.
2831     *
2832     * @param is A new input source to set up.
2833     * @exception IOException If any IO errors occur.
2834     * @exception Exception is parser specific exception form panic method.
2835     */
2836    protected void setinp(InputSource is)
2837            throws Exception {
2838        Reader reader = null;
2839        mChIdx = 0;
2840        mChLen = 0;
2841        mChars = mInp.chars;
2842        mInp.src = null;
2843        if (mPh < PH_DOC_START) {
2844            mIsSAlone = false;  // default [#2.9]
2845        }
2846        mIsSAloneSet = false;
2847        if (is.getCharacterStream() != null) {
2848            //          Ignore encoding in the xml text decl.
2849            reader = is.getCharacterStream();
2850            xml(reader);
2851        } else if (is.getByteStream() != null) {
2852            String expenc;
2853            if (is.getEncoding() != null) {
2854                //              Ignore encoding in the xml text decl.
2855                expenc = is.getEncoding().toUpperCase();
2856                if (expenc.equals("UTF-16")) {
2857                    reader = bom(is.getByteStream(), 'U');  // UTF-16 [#4.3.3]
2858                } else {
2859                    reader = enc(expenc, is.getByteStream());
2860                }
2861                xml(reader);
2862            } else {
2863                //              Get encoding from BOM or the xml text decl.
2864                reader = bom(is.getByteStream(), ' ');
2865                /**
2866                 * [#4.3.3] requires BOM for UTF-16, however, it's not uncommon
2867                 * that it may be missing. A mature technique exists in Xerces
2868                 * to further check for possible UTF-16 encoding
2869                 */
2870                if (reader == null) {
2871                    reader = utf16(is.getByteStream());
2872                }
2873
2874                if (reader == null) {
2875                    //          Encoding is defined by the xml text decl.
2876                    reader = enc("UTF-8", is.getByteStream());
2877                    expenc = xml(reader);
2878                    if (!expenc.equals("UTF-8")) {
2879                        if (expenc.startsWith("UTF-16")) {
2880                            panic(FAULT);  // UTF-16 must have BOM [#4.3.3]
2881                        }
2882                        reader = enc(expenc, is.getByteStream());
2883                    }
2884                } else {
2885                    //          Encoding is defined by the BOM.
2886                    xml(reader);
2887                }
2888            }
2889        } else {
2890            //          There is no support for public/system identifiers.
2891            panic(FAULT);
2892        }
2893        mInp.src = reader;
2894        mInp.pubid = is.getPublicId();
2895        mInp.sysid = is.getSystemId();
2896    }
2897
2898    /**
2899     * Determines the entity encoding.
2900     *
2901     * This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
2902     * first byte returned by the entity's byte stream has to be the first byte
2903     * in the entity. Also, there is no support for UCS-4.
2904     *
2905     * @param is A byte stream of the entity.
2906     * @param hint An encoding hint, character U means UTF-16.
2907     * @return a reader constructed from the BOM or UTF-8 by default.
2908     * @exception Exception is parser specific exception form panic method.
2909     * @exception IOException
2910     */
2911    private Reader bom(InputStream is, char hint)
2912            throws Exception {
2913        int val = is.read();
2914        switch (val) {
2915            case 0xef:     // UTF-8
2916                if (hint == 'U') // must be UTF-16
2917                {
2918                    panic(FAULT);
2919                }
2920                if (is.read() != 0xbb) {
2921                    panic(FAULT);
2922                }
2923                if (is.read() != 0xbf) {
2924                    panic(FAULT);
2925                }
2926                return new ReaderUTF8(is);
2927
2928            case 0xfe:     // UTF-16, big-endian
2929                if (is.read() != 0xff) {
2930                    panic(FAULT);
2931                }
2932                return new ReaderUTF16(is, 'b');
2933
2934            case 0xff:     // UTF-16, little-endian
2935                if (is.read() != 0xfe) {
2936                    panic(FAULT);
2937                }
2938                return new ReaderUTF16(is, 'l');
2939
2940            case -1:
2941                mChars[mChIdx++] = EOS;
2942                return new ReaderUTF8(is);
2943
2944            default:
2945                if (hint == 'U') // must be UTF-16
2946                {
2947                    panic(FAULT);
2948                }
2949                //              Read the rest of UTF-8 character
2950                switch (val & 0xf0) {
2951                    case 0xc0:
2952                    case 0xd0:
2953                        mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
2954                        break;
2955
2956                    case 0xe0:
2957                        mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
2958                                | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
2959                        break;
2960
2961                    case 0xf0:  // UCS-4 character
2962                        throw new UnsupportedEncodingException();
2963
2964                    default:
2965                        mChars[mChIdx++] = (char) val;
2966                        break;
2967                }
2968                return null;
2969        }
2970    }
2971
2972
2973    /**
2974     * Using a mature technique from Xerces, this method checks further after
2975     * the bom method above to see if the encoding is UTF-16
2976     *
2977     * @param is A byte stream of the entity.
2978     * @return a reader, may be null
2979     * @exception Exception is parser specific exception form panic method.
2980     * @exception IOException
2981     */
2982    private Reader utf16(InputStream is)
2983            throws Exception {
2984        if (mChIdx != 0) {
2985            //The bom method has read ONE byte into the buffer.
2986            byte b0 = (byte)mChars[0];
2987            if (b0 == 0x00 || b0 == 0x3C) {
2988                int b1 = is.read();
2989                int b2 = is.read();
2990                int b3 = is.read();
2991                if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2992                    // UTF-16, big-endian, no BOM
2993                    mChars[0] = (char)(b1);
2994                    mChars[mChIdx++] = (char)(b3);
2995                    return new ReaderUTF16(is, 'b');
2996                } else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2997                    // UTF-16, little-endian, no BOM
2998                    mChars[0] = (char)(b0);
2999                    mChars[mChIdx++] = (char)(b2);
3000                    return new ReaderUTF16(is, 'l');
3001                } else {
3002                    /**not every InputStream supports reset, so we have to remember
3003                     * the state for further parsing
3004                    **/
3005                    mChars[0] = (char)(b0);
3006                    mChars[mChIdx++] = (char)(b1);
3007                    mChars[mChIdx++] = (char)(b2);
3008                    mChars[mChIdx++] = (char)(b3);
3009                }
3010
3011            }
3012        }
3013        return null;
3014    }
3015    /**
3016     * Parses the xml text declaration.
3017     *
3018     * This method gets encoding from the xml text declaration [#4.3.1] if any.
3019     * The method assumes the buffer (mChars) is big enough to accommodate whole
3020     * xml text declaration.
3021     *
3022     * @param reader is entity reader.
3023     * @return The xml text declaration encoding or default UTF-8 encoding.
3024     * @exception Exception is parser specific exception form panic method.
3025     * @exception IOException
3026     */
3027    private String xml(Reader reader)
3028            throws Exception {
3029        String str = null;
3030        String enc = "UTF-8";
3031        char ch;
3032        int val;
3033        short st = 0;
3034        int byteRead =  mChIdx; //number of bytes read prior to entering this method
3035
3036        while (st >= 0 && mChIdx < mChars.length) {
3037            if (st < byteRead) {
3038                ch = mChars[st];
3039            } else {
3040                ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3041                mChars[mChIdx++] = ch;
3042            }
3043
3044            switch (st) {
3045                case 0:     // read '<' of xml declaration
3046                    switch (ch) {
3047                        case '<':
3048                            st = 1;
3049                            break;
3050
3051                        case 0xfeff:    // the byte order mask
3052                            ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3053                            mChars[mChIdx - 1] = ch;
3054                            st = (short) ((ch == '<') ? 1 : -1);
3055                            break;
3056
3057                        default:
3058                            st = -1;
3059                            break;
3060                    }
3061                    break;
3062
3063                case 1:     // read '?' of xml declaration [#4.3.1]
3064                    st = (short) ((ch == '?') ? 2 : -1);
3065                    break;
3066
3067                case 2:     // read 'x' of xml declaration [#4.3.1]
3068                    st = (short) ((ch == 'x') ? 3 : -1);
3069                    break;
3070
3071                case 3:     // read 'm' of xml declaration [#4.3.1]
3072                    st = (short) ((ch == 'm') ? 4 : -1);
3073                    break;
3074
3075                case 4:     // read 'l' of xml declaration [#4.3.1]
3076                    st = (short) ((ch == 'l') ? 5 : -1);
3077                    break;
3078
3079                case 5:     // read white space after 'xml'
3080                    switch (ch) {
3081                        case ' ':
3082                        case '\t':
3083                        case '\r':
3084                        case '\n':
3085                            st = 6;
3086                            break;
3087
3088                        default:
3089                            st = -1;
3090                            break;
3091                    }
3092                    break;
3093
3094                case 6:     // read content of xml declaration
3095                    switch (ch) {
3096                        case '?':
3097                            st = 7;
3098                            break;
3099
3100                        case EOS:
3101                            st = -2;
3102                            break;
3103
3104                        default:
3105                            break;
3106                    }
3107                    break;
3108
3109                case 7:     // read '>' after '?' of xml declaration
3110                    switch (ch) {
3111                        case '>':
3112                        case EOS:
3113                            st = -2;
3114                            break;
3115
3116                        default:
3117                            st = 6;
3118                            break;
3119                    }
3120                    break;
3121
3122                default:
3123                    panic(FAULT);
3124                    break;
3125            }
3126        }
3127        mChLen = mChIdx;
3128        mChIdx = 0;
3129        //              If there is no xml text declaration, the encoding is default.
3130        if (st == -1) {
3131            return enc;
3132        }
3133        mChIdx = 5;  // the first white space after "<?xml"
3134        //              Parse the xml text declaration
3135        for (st = 0; st >= 0;) {
3136            ch = getch();
3137            switch (st) {
3138                case 0:     // skip spaces after the xml declaration name
3139                    if (chtyp(ch) != ' ') {
3140                        bkch();
3141                        st = 1;
3142                    }
3143                    break;
3144
3145                case 1:     // read xml declaration version
3146                case 2:     // read xml declaration encoding or standalone
3147                case 3:     // read xml declaration standalone
3148                    switch (chtyp(ch)) {
3149                        case 'a':
3150                        case 'A':
3151                        case '_':
3152                            bkch();
3153                            str = name(false).toLowerCase();
3154                            if ("version".equals(str) == true) {
3155                                if (st != 1) {
3156                                    panic(FAULT);
3157                                }
3158                                if ("1.0".equals(eqstr('=')) != true) {
3159                                    panic(FAULT);
3160                                }
3161                                mInp.xmlver = 0x0100;
3162                                st = 2;
3163                            } else if ("encoding".equals(str) == true) {
3164                                if (st != 2) {
3165                                    panic(FAULT);
3166                                }
3167                                mInp.xmlenc = eqstr('=').toUpperCase();
3168                                enc = mInp.xmlenc;
3169                                st = 3;
3170                            } else if ("standalone".equals(str) == true) {
3171                                if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1]
3172                                {
3173                                    panic(FAULT);
3174                                }
3175                                str = eqstr('=').toLowerCase();
3176                                //              Check the 'standalone' value and use it [#5.1]
3177                                if (str.equals("yes") == true) {
3178                                    mIsSAlone = true;
3179                                } else if (str.equals("no") == true) {
3180                                    mIsSAlone = false;
3181                                } else {
3182                                    panic(FAULT);
3183                                }
3184                                mIsSAloneSet = true;
3185                                st = 4;
3186                            } else {
3187                                panic(FAULT);
3188                            }
3189                            break;
3190
3191                        case ' ':
3192                            break;
3193
3194                        case '?':
3195                            if (st == 1) {
3196                                panic(FAULT);
3197                            }
3198                            bkch();
3199                            st = 4;
3200                            break;
3201
3202                        default:
3203                            panic(FAULT);
3204                    }
3205                    break;
3206
3207                case 4:     // end of xml declaration
3208                    switch (chtyp(ch)) {
3209                        case '?':
3210                            if (getch() != '>') {
3211                                panic(FAULT);
3212                            }
3213                            if (mPh <= PH_DOC_START) {
3214                                mPh = PH_MISC_DTD;  // misc before DTD
3215                            }
3216                            st = -1;
3217                            break;
3218
3219                        case ' ':
3220                            break;
3221
3222                        default:
3223                            panic(FAULT);
3224                    }
3225                    break;
3226
3227                default:
3228                    panic(FAULT);
3229            }
3230        }
3231        return enc;
3232    }
3233
3234    /**
3235     * Sets up the document reader.
3236     *
3237     * @param name an encoding name.
3238     * @param is the document byte input stream.
3239     * @return a reader constructed from encoding name and input stream.
3240     * @exception UnsupportedEncodingException
3241     */
3242    private Reader enc(String name, InputStream is)
3243            throws UnsupportedEncodingException {
3244        //              DO NOT CLOSE current reader if any!
3245        if (name.equals("UTF-8")) {
3246            return new ReaderUTF8(is);
3247        } else if (name.equals("UTF-16LE")) {
3248            return new ReaderUTF16(is, 'l');
3249        } else if (name.equals("UTF-16BE")) {
3250            return new ReaderUTF16(is, 'b');
3251        } else {
3252            return new InputStreamReader(is, name);
3253        }
3254    }
3255
3256    /**
3257     * Sets up current input on the top of the input stack.
3258     *
3259     * @param inp A new input to set up.
3260     */
3261    protected void push(Input inp) {
3262        mInp.chLen = mChLen;
3263        mInp.chIdx = mChIdx;
3264        inp.next = mInp;
3265        mInp = inp;
3266        mChars = inp.chars;
3267        mChLen = inp.chLen;
3268        mChIdx = inp.chIdx;
3269    }
3270
3271    /**
3272     * Restores previous input on the top of the input stack.
3273     */
3274    protected void pop() {
3275        if (mInp.src != null) {
3276            try {
3277                mInp.src.close();
3278            } catch (IOException ioe) {
3279            }
3280            mInp.src = null;
3281        }
3282        mInp = mInp.next;
3283        if (mInp != null) {
3284            mChars = mInp.chars;
3285            mChLen = mInp.chLen;
3286            mChIdx = mInp.chIdx;
3287        } else {
3288            mChars = null;
3289            mChLen = 0;
3290            mChIdx = 0;
3291        }
3292    }
3293
3294    /**
3295     * Maps a character to its type.
3296     *
3297     * Possible character type values are:
3298     * <ul>
3299     * <li>' ' - for any kind of whitespace character;</li>
3300     * <li>'a' - for any lower case alphabetical character value;</li>
3301     * <li>'A' - for any upper case alphabetical character value;</li>
3302     * <li>'d' - for any decimal digit character value;</li>
3303     * <li>'z' - for any character less than ' ' except '\t', '\n', '\r';</li>
3304     * <li>'X' - for any not ASCII character;</li>
3305     * <li>'Z' - for EOS character.</li>
3306     * </ul>
3307     * An ASCII (7 bit) character which does not fall in any category
3308     * listed above is mapped to itself.
3309     *
3310     * @param ch The character to map.
3311     * @return The type of character.
3312     */
3313    protected char chtyp(char ch) {
3314        if (ch < 0x80) {
3315            return (char) asctyp[ch];
3316        }
3317        return (ch != EOS) ? 'X' : 'Z';
3318    }
3319
3320    /**
3321     * Retrives the next character in the document.
3322     *
3323     * @return The next character in the document.
3324     */
3325    protected char getch()
3326            throws IOException {
3327        if (mChIdx >= mChLen) {
3328            if (mInp.src == null) {
3329                pop();  // remove internal entity
3330                return getch();
3331            }
3332            //          Read new portion of the document characters
3333            int Num = mInp.src.read(mChars, 0, mChars.length);
3334            if (Num < 0) {
3335                if (mInp != mDoc) {
3336                    pop();  // restore the previous input
3337                    return getch();
3338                } else {
3339                    mChars[0] = EOS;
3340                    mChLen = 1;
3341                }
3342            } else {
3343                mChLen = Num;
3344            }
3345            mChIdx = 0;
3346        }
3347        return mChars[mChIdx++];
3348    }
3349
3350    /**
3351     * Puts back the last read character.
3352     *
3353     * This method <strong>MUST NOT</strong> be called more then once after each
3354     * call of {@link #getch getch} method.
3355     */
3356    protected void bkch()
3357            throws Exception {
3358        if (mChIdx <= 0) {
3359            panic(FAULT);
3360        }
3361        mChIdx--;
3362    }
3363
3364    /**
3365     * Sets the current character.
3366     *
3367     * @param ch The character to set.
3368     */
3369    protected void setch(char ch) {
3370        mChars[mChIdx] = ch;
3371    }
3372
3373    /**
3374     * Finds a pair in the pair chain by a qualified name.
3375     *
3376     * @param chain The first element of the chain of pairs.
3377     * @param qname The qualified name.
3378     * @return A pair with the specified qualified name or null.
3379     */
3380    protected Pair find(Pair chain, char[] qname) {
3381        for (Pair pair = chain; pair != null; pair = pair.next) {
3382            if (pair.eqname(qname) == true) {
3383                return pair;
3384            }
3385        }
3386        return null;
3387    }
3388
3389    /**
3390     * Provedes an instance of a pair.
3391     *
3392     * @param next The reference to a next pair.
3393     * @return An instance of a pair.
3394     */
3395    protected Pair pair(Pair next) {
3396        Pair pair;
3397
3398        if (mDltd != null) {
3399            pair = mDltd;
3400            mDltd = pair.next;
3401        } else {
3402            pair = new Pair();
3403        }
3404        pair.next = next;
3405
3406        return pair;
3407    }
3408
3409    /**
3410     * Deletes an instance of a pair.
3411     *
3412     * @param pair The pair to delete.
3413     * @return A reference to the next pair in a chain.
3414     */
3415    protected Pair del(Pair pair) {
3416        Pair next = pair.next;
3417
3418        pair.name = null;
3419        pair.value = null;
3420        pair.chars = null;
3421        pair.list = null;
3422        pair.next = mDltd;
3423        mDltd = pair;
3424
3425        return next;
3426    }
3427}
3428