1/*
2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
3 */
4/*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements.  See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21package com.sun.org.apache.xerces.internal.impl;
22
23import java.io.IOException;
24
25import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
26import com.sun.org.apache.xerces.internal.util.XML11Char;
27import com.sun.org.apache.xerces.internal.util.XMLChar;
28import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
29import com.sun.org.apache.xerces.internal.xni.XMLString;
30import com.sun.org.apache.xerces.internal.xni.XNIException;
31
32/**
33 * This class is responsible for scanning XML document structure
34 * and content. The scanner acts as the source for the document
35 * information which is communicated to the document handler.
36 * <p>
37 * This component requires the following features and properties from the
38 * component manager that uses it:
39 * <ul>
40 *  <li>http://xml.org/sax/features/namespaces</li>
41 *  <li>http://xml.org/sax/features/validation</li>
42 *  <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
43 *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
44 *  <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
45 *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
46 *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
47 *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
48 *  <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
49 * </ul>
50 *
51 * @xerces.internal
52 *
53 * @author Glenn Marcy, IBM
54 * @author Andy Clark, IBM
55 * @author Arnaud  Le Hors, IBM
56 * @author Eric Ye, IBM
57 *
58 */
59public class XML11DocumentScannerImpl
60    extends XMLDocumentScannerImpl {
61
62
63    /** String buffer. */
64    private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
65    private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
66    private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
67
68    //
69    // Constructors
70    //
71
72    /** Default constructor. */
73    public XML11DocumentScannerImpl() {super();} // <init>()
74
75    //
76    // overridden methods
77    //
78
79    // XMLDocumentFragmentImpl methods
80
81    /**
82     * Scans element content.
83     *
84     * @return Returns the next character on the stream.
85     */
86    protected int scanContent(XMLStringBuffer content) throws IOException, XNIException {
87
88        fTempString.length = 0;
89        int c = fEntityScanner.scanContent(fTempString);
90        content.append(fTempString);
91
92        if (c == '\r' || c == 0x85 || c == 0x2028) {
93            // happens when there is the character reference &#13;
94            // but scanContent doesn't do entity expansions...
95            // is this *really* necessary???  - NG
96            fEntityScanner.scanChar(null);
97            content.append((char)c);
98            c = -1;
99        }
100        /*if (fDocumentHandler != null && content.length > 0) {
101            fDocumentHandler.characters(content, null);
102        } */
103
104        if (c == ']') {
105            content.append((char)fEntityScanner.scanChar(null));
106            // remember where we are in case we get an endEntity before we
107            // could flush the buffer out - this happens when we're parsing an
108            // entity which ends with a ]
109            fInScanContent = true;
110            //
111            // We work on a single character basis to handle cases such as:
112            // ']]]>' which we might otherwise miss.
113            //
114            if (fEntityScanner.skipChar(']', null)) {
115                content.append(']');
116                while (fEntityScanner.skipChar(']', null)) {
117                    content.append(']');
118                }
119                if (fEntityScanner.skipChar('>', null)) {
120                    reportFatalError("CDEndInContent", null);
121                }
122            }
123            /*if (fDocumentHandler != null && fStringBuffer.length != 0) {
124                fDocumentHandler.characters(fStringBuffer, null);
125            }*/
126            fInScanContent = false;
127            c = -1;
128        }
129        return c;
130
131    } // scanContent():int
132
133    /**
134     * Scans an attribute value and normalizes whitespace converting all
135     * whitespace characters to space characters.
136     *
137     * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
138     *
139     * @param value The XMLString to fill in with the value.
140     * @param nonNormalizedValue The XMLString to fill in with the
141     *                           non-normalized value.
142     * @param atName The name of the attribute being parsed (for error msgs).
143     * @param checkEntities true if undeclared entities should be reported as VC violation,
144     *                      false if undeclared entities should be reported as WFC violation.
145     * @param eleName The name of element to which this attribute belongs.
146     * @param isNSURI The flag indicating whether the content is a namespace URI
147     *
148     * @return true if the non-normalized and normalized value are the same
149     *
150     * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
151     * at the time of calling is lost.
152     **/
153    protected boolean scanAttributeValue(XMLString value,
154                                      XMLString nonNormalizedValue,
155                                      String atName,
156                                      boolean checkEntities,String eleName, boolean isNSURI)
157        throws IOException, XNIException
158    {
159        // quote
160        int quote = fEntityScanner.peekChar();
161        if (quote != '\'' && quote != '"') {
162            reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
163        }
164
165        fEntityScanner.scanChar(NameType.ATTRIBUTE);
166        int entityDepth = fEntityDepth;
167
168        int c = fEntityScanner.scanLiteral(quote, value, isNSURI);
169        if (DEBUG_ATTR_NORMALIZATION) {
170            System.out.println("** scanLiteral -> \""
171                               + value.toString() + "\"");
172        }
173
174        int fromIndex = 0;
175        if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
176            /** Both the non-normalized and normalized attribute values are equal. **/
177            nonNormalizedValue.setValues(value);
178            int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE);
179            if (cquote != quote) {
180                reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
181            }
182            return true;
183        }
184        fStringBuffer2.clear();
185        fStringBuffer2.append(value);
186        normalizeWhitespace(value, fromIndex);
187        if (DEBUG_ATTR_NORMALIZATION) {
188            System.out.println("** normalizeWhitespace -> \""
189                               + value.toString() + "\"");
190        }
191        if (c != quote) {
192            fScanningAttribute = true;
193            fStringBuffer.clear();
194            do {
195                fStringBuffer.append(value);
196                if (DEBUG_ATTR_NORMALIZATION) {
197                    System.out.println("** value2: \""
198                                       + fStringBuffer.toString() + "\"");
199                }
200                if (c == '&') {
201                    fEntityScanner.skipChar('&', NameType.REFERENCE);
202                    if (entityDepth == fEntityDepth) {
203                        fStringBuffer2.append('&');
204                    }
205                    if (fEntityScanner.skipChar('#', NameType.REFERENCE)) {
206                        if (entityDepth == fEntityDepth) {
207                            fStringBuffer2.append('#');
208                        }
209                        int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
210                        if (ch != -1) {
211                            if (DEBUG_ATTR_NORMALIZATION) {
212                                System.out.println("** value3: \""
213                                                   + fStringBuffer.toString()
214                                                   + "\"");
215                            }
216                        }
217                    }
218                    else {
219                        String entityName = fEntityScanner.scanName(NameType.REFERENCE);
220                        if (entityName == null) {
221                            reportFatalError("NameRequiredInReference", null);
222                        }
223                        else if (entityDepth == fEntityDepth) {
224                            fStringBuffer2.append(entityName);
225                        }
226                        if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
227                            reportFatalError("SemicolonRequiredInReference",
228                                             new Object []{entityName});
229                        }
230                        else if (entityDepth == fEntityDepth) {
231                            fStringBuffer2.append(';');
232                        }
233                        if (resolveCharacter(entityName, fStringBuffer)) {
234                            checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
235                        }
236                        else {
237                            if (fEntityManager.isExternalEntity(entityName)) {
238                                reportFatalError("ReferenceToExternalEntity",
239                                                 new Object[] { entityName });
240                            }
241                            else {
242                                if (!fEntityManager.isDeclaredEntity(entityName)) {
243                                    //WFC & VC: Entity Declared
244                                    if (checkEntities) {
245                                        if (fValidation) {
246                                            fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
247                                                                       "EntityNotDeclared",
248                                                                       new Object[]{entityName},
249                                                                       XMLErrorReporter.SEVERITY_ERROR);
250                                        }
251                                    }
252                                    else {
253                                        reportFatalError("EntityNotDeclared",
254                                                         new Object[]{entityName});
255                                    }
256                                }
257                                fEntityManager.startEntity(true, entityName, true);
258                            }
259                        }
260                    }
261                }
262                else if (c == '<') {
263                    reportFatalError("LessthanInAttValue",
264                                     new Object[] { eleName, atName });
265                    fEntityScanner.scanChar(null);
266                    if (entityDepth == fEntityDepth) {
267                        fStringBuffer2.append((char)c);
268                    }
269                }
270                else if (c == '%' || c == ']') {
271                    fEntityScanner.scanChar(null);
272                    fStringBuffer.append((char)c);
273                    if (entityDepth == fEntityDepth) {
274                        fStringBuffer2.append((char)c);
275                    }
276                    if (DEBUG_ATTR_NORMALIZATION) {
277                        System.out.println("** valueF: \""
278                                           + fStringBuffer.toString() + "\"");
279                    }
280                }
281                // note that none of these characters should ever get through
282                // XML11EntityScanner.  Not sure why
283                // this check was originally necessary.  - NG
284                else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
285                    fEntityScanner.scanChar(null);
286                    fStringBuffer.append(' ');
287                    if (entityDepth == fEntityDepth) {
288                        fStringBuffer2.append('\n');
289                    }
290                }
291                else if (c != -1 && XMLChar.isHighSurrogate(c)) {
292                    fStringBuffer3.clear();
293                    if (scanSurrogates(fStringBuffer3)) {
294                        fStringBuffer.append(fStringBuffer3);
295                        if (entityDepth == fEntityDepth) {
296                            fStringBuffer2.append(fStringBuffer3);
297                        }
298                        if (DEBUG_ATTR_NORMALIZATION) {
299                            System.out.println("** valueI: \""
300                                               + fStringBuffer.toString()
301                                               + "\"");
302                        }
303                    }
304                }
305                else if (c != -1 && isInvalidLiteral(c)) {
306                    reportFatalError("InvalidCharInAttValue",
307                                     new Object[] {eleName, atName, Integer.toString(c, 16)});
308                    fEntityScanner.scanChar(null);
309                    if (entityDepth == fEntityDepth) {
310                        fStringBuffer2.append((char)c);
311                    }
312                }
313                c = fEntityScanner.scanLiteral(quote, value, isNSURI);
314                if (entityDepth == fEntityDepth) {
315                    fStringBuffer2.append(value);
316                }
317                normalizeWhitespace(value);
318            } while (c != quote || entityDepth != fEntityDepth);
319            fStringBuffer.append(value);
320            if (DEBUG_ATTR_NORMALIZATION) {
321                System.out.println("** valueN: \""
322                                   + fStringBuffer.toString() + "\"");
323            }
324            value.setValues(fStringBuffer);
325            fScanningAttribute = false;
326        }
327        nonNormalizedValue.setValues(fStringBuffer2);
328
329        // quote
330        int cquote = fEntityScanner.scanChar(null);
331        if (cquote != quote) {
332            reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
333        }
334        return nonNormalizedValue.equals(value.ch, value.offset, value.length);
335    } // scanAttributeValue()
336
337    //
338    // XMLScanner methods
339    //
340    // NOTE:  this is a carbon copy of the code in XML11DTDScannerImpl;
341    // we need to override these methods in both places.
342    // this needs to be refactored!!!  - NG
343    /**
344     * Scans public ID literal.
345     *
346     * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
347     * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
348     *
349     * The returned string is normalized according to the following rule,
350     * from http://www.w3.org/TR/REC-xml#dt-pubid:
351     *
352     * Before a match is attempted, all strings of white space in the public
353     * identifier must be normalized to single space characters (#x20), and
354     * leading and trailing white space must be removed.
355     *
356     * @param literal The string to fill in with the public ID literal.
357     * @return True on success.
358     *
359     * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
360     * the time of calling is lost.
361     */
362    protected boolean scanPubidLiteral(XMLString literal)
363        throws IOException, XNIException
364    {
365        int quote = fEntityScanner.scanChar(null);
366        if (quote != '\'' && quote != '"') {
367            reportFatalError("QuoteRequiredInPublicID", null);
368            return false;
369        }
370
371        fStringBuffer.clear();
372        // skip leading whitespace
373        boolean skipSpace = true;
374        boolean dataok = true;
375        while (true) {
376            int c = fEntityScanner.scanChar(null);
377            // REVISIT:  none of these except \n and 0x20 should make it past the entity scanner
378            if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
379                if (!skipSpace) {
380                    // take the first whitespace as a space and skip the others
381                    fStringBuffer.append(' ');
382                    skipSpace = true;
383                }
384            }
385            else if (c == quote) {
386                if (skipSpace) {
387                    // if we finished on a space let's trim it
388                    fStringBuffer.length--;
389                }
390                literal.setValues(fStringBuffer);
391                break;
392            }
393            else if (XMLChar.isPubid(c)) {
394                fStringBuffer.append((char)c);
395                skipSpace = false;
396            }
397            else if (c == -1) {
398                reportFatalError("PublicIDUnterminated", null);
399                return false;
400            }
401            else {
402                dataok = false;
403                reportFatalError("InvalidCharInPublicID",
404                                 new Object[]{Integer.toHexString(c)});
405            }
406        }
407        return dataok;
408   }
409
410    /**
411     * Normalize whitespace in an XMLString converting all whitespace
412     * characters to space characters.
413     */
414    protected void normalizeWhitespace(XMLString value) {
415        int end = value.offset + value.length;
416            for (int i = value.offset; i < end; ++i) {
417           int c = value.ch[i];
418           if (XMLChar.isSpace(c)) {
419               value.ch[i] = ' ';
420           }
421       }
422    }
423
424    /**
425     * Normalize whitespace in an XMLString converting all whitespace
426     * characters to space characters.
427     */
428    protected void normalizeWhitespace(XMLString value, int fromIndex) {
429        int end = value.offset + value.length;
430        for (int i = value.offset + fromIndex; i < end; ++i) {
431            int c = value.ch[i];
432            if (XMLChar.isSpace(c)) {
433                value.ch[i] = ' ';
434            }
435        }
436    }
437
438    /**
439     * Checks whether this string would be unchanged by normalization.
440     *
441     * @return -1 if the value would be unchanged by normalization,
442     * otherwise the index of the first whitespace character which
443     * would be transformed.
444     */
445    protected int isUnchangedByNormalization(XMLString value) {
446        int end = value.offset + value.length;
447        for (int i = value.offset; i < end; ++i) {
448            int c = value.ch[i];
449            if (XMLChar.isSpace(c)) {
450                return i - value.offset;
451            }
452        }
453        return -1;
454    }
455
456    // returns true if the given character is not
457    // valid with respect to the version of
458    // XML understood by this scanner.
459    protected boolean isInvalid(int value) {
460        return (XML11Char.isXML11Invalid(value));
461    } // isInvalid(int):  boolean
462
463    // returns true if the given character is not
464    // valid or may not be used outside a character reference
465    // with respect to the version of XML understood by this scanner.
466    protected boolean isInvalidLiteral(int value) {
467        return (!XML11Char.isXML11ValidLiteral(value));
468    } // isInvalidLiteral(int):  boolean
469
470    // returns true if the given character is
471    // a valid nameChar with respect to the version of
472    // XML understood by this scanner.
473    protected boolean isValidNameChar(int value) {
474        return (XML11Char.isXML11Name(value));
475    } // isValidNameChar(int):  boolean
476
477    // returns true if the given character is
478    // a valid nameStartChar with respect to the version of
479    // XML understood by this scanner.
480    protected boolean isValidNameStartChar(int value) {
481        return (XML11Char.isXML11NameStart(value));
482    } // isValidNameStartChar(int):  boolean
483
484    // returns true if the given character is
485    // a valid NCName character with respect to the version of
486    // XML understood by this scanner.
487    protected boolean isValidNCName(int value) {
488        return (XML11Char.isXML11NCName(value));
489    } // isValidNCName(int):  boolean
490
491    // returns true if the given character is
492    // a valid high surrogate for a nameStartChar
493    // with respect to the version of XML understood
494    // by this scanner.
495    protected boolean isValidNameStartHighSurrogate(int value) {
496        return XML11Char.isXML11NameHighSurrogate(value);
497    } // isValidNameStartHighSurrogate(int):  boolean
498
499    protected boolean versionSupported(String version) {
500        return (version.equals("1.1") || version.equals("1.0"));
501    } // versionSupported(String):  boolean
502
503    // returns the error message key for unsupported
504    // versions of XML with respect to the version of
505    // XML understood by this scanner.
506    protected String getVersionNotSupportedKey () {
507        return "VersionNotSupported11";
508    } // getVersionNotSupportedKey: String
509
510} // class XML11DocumentScannerImpl
511