1/*
2 * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
3 */
4/*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements.  See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21package com.sun.org.apache.xml.internal.serializer;
22
23import java.io.IOException;
24import java.util.Properties;
25
26import javax.xml.transform.Result;
27
28import org.xml.sax.Attributes;
29import org.xml.sax.SAXException;
30
31import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
32import com.sun.org.apache.xml.internal.serializer.utils.Utils;
33
34/**
35 * This serializer takes a series of SAX or
36 * SAX-like events and writes its output
37 * to the given stream.
38 *
39 * This class is not a public API, it is public
40 * because it is used from another package.
41 *
42 * @xsl.usage internal
43 */
44public final class ToHTMLStream extends ToStream
45{
46
47    /** This flag is set while receiving events from the DTD */
48    protected boolean m_inDTD = false;
49
50    /** True if the previous element is a block element. */
51    private boolean m_isprevblock = false;
52
53    /**
54     * Map that tells which XML characters should have special treatment, and it
55     *  provides character to entity name lookup.
56     */
57    private static final CharInfo m_htmlcharInfo =
58//        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
59        CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
60
61    /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
62    static final Trie m_elementFlags = new Trie();
63
64    static {
65        initTagReference(m_elementFlags);
66    }
67    static void initTagReference(Trie m_elementFlags) {
68
69        // HTML 4.0 loose DTD
70        m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
71        m_elementFlags.put(
72            "FRAME",
73            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
74        m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
75        m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
76        m_elementFlags.put(
77            "ISINDEX",
78            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
79        m_elementFlags.put(
80            "APPLET",
81            new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
82        m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
83        m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
84        m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
85
86        // HTML 4.0 strict DTD
87        m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
88        m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
89        m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
90        m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
91        m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
92        m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
93        m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
94        m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
95        m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
96        m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
97        m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
98        m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
99        m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
100        m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
101        m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
102        m_elementFlags.put(
103            "SUP",
104            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
105        m_elementFlags.put(
106            "SUB",
107            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
108        m_elementFlags.put(
109            "SPAN",
110            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
111        m_elementFlags.put(
112            "BDO",
113            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
114        m_elementFlags.put(
115            "BR",
116            new ElemDesc(
117                0
118                    | ElemDesc.SPECIAL
119                    | ElemDesc.ASPECIAL
120                    | ElemDesc.EMPTY
121                    | ElemDesc.BLOCK));
122        m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
123        m_elementFlags.put(
124            "ADDRESS",
125            new ElemDesc(
126                0
127                    | ElemDesc.BLOCK
128                    | ElemDesc.BLOCKFORM
129                    | ElemDesc.BLOCKFORMFIELDSET));
130        m_elementFlags.put(
131            "DIV",
132            new ElemDesc(
133                0
134                    | ElemDesc.BLOCK
135                    | ElemDesc.BLOCKFORM
136                    | ElemDesc.BLOCKFORMFIELDSET));
137        m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
138        m_elementFlags.put(
139            "MAP",
140            new ElemDesc(
141                0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
142        m_elementFlags.put(
143            "AREA",
144            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
145        m_elementFlags.put(
146            "LINK",
147            new ElemDesc(
148                0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
149        m_elementFlags.put(
150            "IMG",
151            new ElemDesc(
152                0
153                    | ElemDesc.SPECIAL
154                    | ElemDesc.ASPECIAL
155                    | ElemDesc.EMPTY
156                    | ElemDesc.WHITESPACESENSITIVE));
157        m_elementFlags.put(
158            "OBJECT",
159            new ElemDesc(
160                0
161                    | ElemDesc.SPECIAL
162                    | ElemDesc.ASPECIAL
163                    | ElemDesc.HEADMISC
164                    | ElemDesc.WHITESPACESENSITIVE));
165        m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
166        m_elementFlags.put(
167            "HR",
168            new ElemDesc(
169                0
170                    | ElemDesc.BLOCK
171                    | ElemDesc.BLOCKFORM
172                    | ElemDesc.BLOCKFORMFIELDSET
173                    | ElemDesc.EMPTY));
174        m_elementFlags.put(
175            "P",
176            new ElemDesc(
177                0
178                    | ElemDesc.BLOCK
179                    | ElemDesc.BLOCKFORM
180                    | ElemDesc.BLOCKFORMFIELDSET));
181        m_elementFlags.put(
182            "H1",
183            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
184        m_elementFlags.put(
185            "H2",
186            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
187        m_elementFlags.put(
188            "H3",
189            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
190        m_elementFlags.put(
191            "H4",
192            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
193        m_elementFlags.put(
194            "H5",
195            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
196        m_elementFlags.put(
197            "H6",
198            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
199        m_elementFlags.put(
200            "PRE",
201            new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
202        m_elementFlags.put(
203            "Q",
204            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
205        m_elementFlags.put(
206            "BLOCKQUOTE",
207            new ElemDesc(
208                0
209                    | ElemDesc.BLOCK
210                    | ElemDesc.BLOCKFORM
211                    | ElemDesc.BLOCKFORMFIELDSET));
212        m_elementFlags.put("INS", new ElemDesc(0));
213        m_elementFlags.put("DEL", new ElemDesc(0));
214        m_elementFlags.put(
215            "DL",
216            new ElemDesc(
217                0
218                    | ElemDesc.BLOCK
219                    | ElemDesc.BLOCKFORM
220                    | ElemDesc.BLOCKFORMFIELDSET));
221        m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
222        m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
223        m_elementFlags.put(
224            "OL",
225            new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
226        m_elementFlags.put(
227            "UL",
228            new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
229        m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
230        m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
231        m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
232        m_elementFlags.put(
233            "INPUT",
234            new ElemDesc(
235                0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
236        m_elementFlags.put(
237            "SELECT",
238            new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
239        m_elementFlags.put("OPTGROUP", new ElemDesc(0));
240        m_elementFlags.put("OPTION", new ElemDesc(0));
241        m_elementFlags.put(
242            "TEXTAREA",
243            new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
244        m_elementFlags.put(
245            "FIELDSET",
246            new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
247        m_elementFlags.put("LEGEND", new ElemDesc(0));
248        m_elementFlags.put(
249            "BUTTON",
250            new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
251        m_elementFlags.put(
252            "TABLE",
253            new ElemDesc(
254                0
255                    | ElemDesc.BLOCK
256                    | ElemDesc.BLOCKFORM
257                    | ElemDesc.BLOCKFORMFIELDSET));
258        m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
259        m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
260        m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
261        m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
262        m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
263        m_elementFlags.put(
264            "COL",
265            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
266        m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
267        m_elementFlags.put("TH", new ElemDesc(0));
268        m_elementFlags.put("TD", new ElemDesc(0));
269        m_elementFlags.put(
270            "HEAD",
271            new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
272        m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
273        m_elementFlags.put(
274            "BASE",
275            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
276        m_elementFlags.put(
277            "META",
278            new ElemDesc(
279                0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
280        m_elementFlags.put(
281            "STYLE",
282            new ElemDesc(
283                0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
284        m_elementFlags.put(
285            "SCRIPT",
286            new ElemDesc(
287                0
288                    | ElemDesc.SPECIAL
289                    | ElemDesc.ASPECIAL
290                    | ElemDesc.HEADMISC
291                    | ElemDesc.RAW));
292        m_elementFlags.put(
293            "NOSCRIPT",
294            new ElemDesc(
295                0
296                    | ElemDesc.BLOCK
297                    | ElemDesc.BLOCKFORM
298                    | ElemDesc.BLOCKFORMFIELDSET));
299        m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
300
301        // From "John Ky" <hand@syd.speednet.com.au
302        // Transitional Document Type Definition ()
303        // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
304        m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
305
306        // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
307        m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
308        m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
309
310        // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
311        m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
312
313        // From "John Ky" <hand@syd.speednet.com.au
314        m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
315
316        // HTML 4.0, section 16.5
317        m_elementFlags.put(
318            "IFRAME",
319            new ElemDesc(
320                0
321                    | ElemDesc.BLOCK
322                    | ElemDesc.BLOCKFORM
323                    | ElemDesc.BLOCKFORMFIELDSET));
324
325        // Netscape 4 extension
326        m_elementFlags.put(
327            "LAYER",
328            new ElemDesc(
329                0
330                    | ElemDesc.BLOCK
331                    | ElemDesc.BLOCKFORM
332                    | ElemDesc.BLOCKFORMFIELDSET));
333        // Netscape 4 extension
334        m_elementFlags.put(
335            "ILAYER",
336            new ElemDesc(
337                0
338                    | ElemDesc.BLOCK
339                    | ElemDesc.BLOCKFORM
340                    | ElemDesc.BLOCKFORMFIELDSET));
341
342
343        // NOW FOR ATTRIBUTE INFORMATION . . .
344        ElemDesc elemDesc;
345
346
347        // ----------------------------------------------
348        elemDesc = (ElemDesc) m_elementFlags.get("a");
349        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
350        elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
351
352        // ----------------------------------------------
353        elemDesc = (ElemDesc) m_elementFlags.get("area");
354        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
355        elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
356
357        // ----------------------------------------------
358        elemDesc = (ElemDesc) m_elementFlags.get("base");
359        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
360
361        // ----------------------------------------------
362        elemDesc = (ElemDesc) m_elementFlags.get("button");
363        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
364
365        // ----------------------------------------------
366        elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
367        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
368
369        // ----------------------------------------------
370        elemDesc = (ElemDesc) m_elementFlags.get("del");
371        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
372
373        // ----------------------------------------------
374        elemDesc = (ElemDesc) m_elementFlags.get("dir");
375        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
376
377        // ----------------------------------------------
378
379        elemDesc = (ElemDesc) m_elementFlags.get("div");
380        elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
381        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
382
383        // ----------------------------------------------
384        elemDesc = (ElemDesc) m_elementFlags.get("dl");
385        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
386
387        // ----------------------------------------------
388        elemDesc = (ElemDesc) m_elementFlags.get("form");
389        elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
390
391        // ----------------------------------------------
392        // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
393        elemDesc = (ElemDesc) m_elementFlags.get("frame");
394        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
395        elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
396        elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
397
398        // ----------------------------------------------
399        elemDesc = (ElemDesc) m_elementFlags.get("head");
400        elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
401
402        // ----------------------------------------------
403        elemDesc = (ElemDesc) m_elementFlags.get("hr");
404        elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
405
406        // ----------------------------------------------
407        // HTML 4.0, section 16.5
408        elemDesc = (ElemDesc) m_elementFlags.get("iframe");
409        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
410        elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
411
412        // ----------------------------------------------
413        // Netscape 4 extension
414        elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
415        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
416
417        // ----------------------------------------------
418        elemDesc = (ElemDesc) m_elementFlags.get("img");
419        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
420        elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
421        elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
422        elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
423
424        // ----------------------------------------------
425        elemDesc = (ElemDesc) m_elementFlags.get("input");
426        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
427        elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
428        elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
429        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
430        elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
431        elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
432
433        // ----------------------------------------------
434        elemDesc = (ElemDesc) m_elementFlags.get("ins");
435        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
436
437        // ----------------------------------------------
438        // Netscape 4 extension
439        elemDesc = (ElemDesc) m_elementFlags.get("layer");
440        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
441
442        // ----------------------------------------------
443        elemDesc = (ElemDesc) m_elementFlags.get("link");
444        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
445
446        // ----------------------------------------------
447        elemDesc = (ElemDesc) m_elementFlags.get("menu");
448        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
449
450        // ----------------------------------------------
451        elemDesc = (ElemDesc) m_elementFlags.get("object");
452        elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
453        elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
454        elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
455        elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
456        elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
457        elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
458
459        // ----------------------------------------------
460        elemDesc = (ElemDesc) m_elementFlags.get("ol");
461        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
462
463        // ----------------------------------------------
464        elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
465        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
466
467        // ----------------------------------------------
468        elemDesc = (ElemDesc) m_elementFlags.get("option");
469        elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
470        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
471
472        // ----------------------------------------------
473        elemDesc = (ElemDesc) m_elementFlags.get("q");
474        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
475
476        // ----------------------------------------------
477        elemDesc = (ElemDesc) m_elementFlags.get("script");
478        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
479        elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
480        elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
481
482        // ----------------------------------------------
483        elemDesc = (ElemDesc) m_elementFlags.get("select");
484        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
485        elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
486
487        // ----------------------------------------------
488        elemDesc = (ElemDesc) m_elementFlags.get("table");
489        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
490
491        // ----------------------------------------------
492        elemDesc = (ElemDesc) m_elementFlags.get("td");
493        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
494
495        // ----------------------------------------------
496        elemDesc = (ElemDesc) m_elementFlags.get("textarea");
497        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
498        elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
499
500        // ----------------------------------------------
501        elemDesc = (ElemDesc) m_elementFlags.get("th");
502        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
503
504        // ----------------------------------------------
505        // The nowrap attribute of a tr element is both
506        // a Netscape and Internet-Explorer extension
507        elemDesc = (ElemDesc) m_elementFlags.get("tr");
508        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
509
510        // ----------------------------------------------
511        elemDesc = (ElemDesc) m_elementFlags.get("ul");
512        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
513    }
514
515    /**
516     * Dummy element for elements not found.
517     */
518    static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
519
520    /** True if URLs should be specially escaped with the %xx form. */
521    private boolean m_specialEscapeURLs = true;
522
523    /** True if the META tag should be omitted. */
524    private boolean m_omitMetaTag = false;
525
526    /**
527     * Tells if the formatter should use special URL escaping.
528     *
529     * @param bool True if URLs should be specially escaped with the %xx form.
530     */
531    public void setSpecialEscapeURLs(boolean bool)
532    {
533        m_specialEscapeURLs = bool;
534    }
535
536    /**
537     * Tells if the formatter should omit the META tag.
538     *
539     * @param bool True if the META tag should be omitted.
540     */
541    public void setOmitMetaTag(boolean bool)
542    {
543        m_omitMetaTag = bool;
544    }
545
546    /**
547     * Specifies an output format for this serializer. It the
548     * serializer has already been associated with an output format,
549     * it will switch to the new format. This method should not be
550     * called while the serializer is in the process of serializing
551     * a document.
552     *
553     * This method can be called multiple times before starting
554     * the serialization of a particular result-tree. In principle
555     * all serialization parameters can be changed, with the exception
556     * of method="html" (it must be method="html" otherwise we
557     * shouldn't even have a ToHTMLStream object here!)
558     *
559     * @param format The output format or serialzation parameters
560     * to use.
561     */
562    public void setOutputFormat(Properties format)
563    {
564
565        m_specialEscapeURLs =
566            OutputPropertyUtils.getBooleanProperty(
567                OutputPropertiesFactory.S_USE_URL_ESCAPING,
568                format);
569
570        m_omitMetaTag =
571            OutputPropertyUtils.getBooleanProperty(
572                OutputPropertiesFactory.S_OMIT_META_TAG,
573                format);
574
575        super.setOutputFormat(format);
576    }
577
578    /**
579     * Tells if the formatter should use special URL escaping.
580     *
581     * @return True if URLs should be specially escaped with the %xx form.
582     */
583    private final boolean getSpecialEscapeURLs()
584    {
585        return m_specialEscapeURLs;
586    }
587
588    /**
589     * Tells if the formatter should omit the META tag.
590     *
591     * @return True if the META tag should be omitted.
592     */
593    private final boolean getOmitMetaTag()
594    {
595        return m_omitMetaTag;
596    }
597
598    /**
599     * Get a description of the given element.
600     *
601     * @param name non-null name of element, case insensitive.
602     *
603     * @return non-null reference to ElemDesc, which may be m_dummy if no
604     *         element description matches the given name.
605     */
606    public static final ElemDesc getElemDesc(String name)
607    {
608        /* this method used to return m_dummy  when name was null
609         * but now it doesn't check and and requires non-null name.
610         */
611        Object obj = m_elementFlags.get(name);
612        if (null != obj)
613            return (ElemDesc)obj;
614        return m_dummy;
615    }
616
617    /**
618     * A Trie that is just a copy of the "static" one.
619     * We need this one to be able to use the faster, but not thread-safe
620     * method Trie.get2(name)
621     */
622    private Trie m_htmlInfo = new Trie(m_elementFlags);
623    /**
624     * Calls to this method could be replaced with calls to
625     * getElemDesc(name), but this one should be faster.
626     */
627    private ElemDesc getElemDesc2(String name)
628    {
629        Object obj = m_htmlInfo.get2(name);
630        if (null != obj)
631            return (ElemDesc)obj;
632        return m_dummy;
633    }
634
635    /**
636     * Default constructor.
637     */
638    public ToHTMLStream()
639    {
640
641        super();
642        m_charInfo = m_htmlcharInfo;
643        // initialize namespaces
644        m_prefixMap = new NamespaceMappings();
645
646    }
647
648    /** The name of the current element. */
649//    private String m_currentElementName = null;
650
651    /**
652     * Receive notification of the beginning of a document.
653     *
654     * @throws org.xml.sax.SAXException Any SAX exception, possibly
655     *            wrapping another exception.
656     *
657     * @throws org.xml.sax.SAXException
658     */
659    protected void startDocumentInternal() throws org.xml.sax.SAXException
660    {
661        super.startDocumentInternal();
662
663        m_needToCallStartDocument = false;
664        m_needToOutputDocTypeDecl = true;
665        m_startNewLine = false;
666        setOmitXMLDeclaration(true);
667
668        if (true == m_needToOutputDocTypeDecl)
669        {
670            String doctypeSystem = getDoctypeSystem();
671            String doctypePublic = getDoctypePublic();
672            if ((null != doctypeSystem) || (null != doctypePublic))
673            {
674                final java.io.Writer writer = m_writer;
675                try
676                {
677                writer.write("<!DOCTYPE html");
678
679                if (null != doctypePublic)
680                {
681                    writer.write(" PUBLIC \"");
682                    writer.write(doctypePublic);
683                    writer.write('"');
684                }
685
686                if (null != doctypeSystem)
687                {
688                    if (null == doctypePublic)
689                        writer.write(" SYSTEM \"");
690                    else
691                        writer.write(" \"");
692
693                    writer.write(doctypeSystem);
694                    writer.write('"');
695                }
696
697                writer.write('>');
698                outputLineSep();
699                }
700                catch(IOException e)
701                {
702                    throw new SAXException(e);
703                }
704            }
705        }
706
707        m_needToOutputDocTypeDecl = false;
708    }
709
710    /**
711     * Receive notification of the end of a document.
712     *
713     * @throws org.xml.sax.SAXException Any SAX exception, possibly
714     *            wrapping another exception.
715     *
716     * @throws org.xml.sax.SAXException
717     */
718    public final void endDocument() throws org.xml.sax.SAXException
719    {
720        if (m_doIndent) {
721            flushCharactersBuffer();
722        }
723        flushPending();
724        if (m_doIndent && !m_isprevtext)
725        {
726            try
727            {
728            outputLineSep();
729            }
730            catch(IOException e)
731            {
732                throw new SAXException(e);
733            }
734        }
735
736        flushWriter();
737        if (m_tracer != null)
738            super.fireEndDoc();
739    }
740
741    /**
742     * If the previous is an inline element, won't insert a new line before the
743     * text.
744     *
745     */
746    protected boolean shouldIndentForText() {
747        return super.shouldIndentForText() && m_isprevblock;
748    }
749
750    /**
751     * Only check m_doIndent, disregard m_ispreserveSpace.
752     *
753     * @return True if the content should be formatted.
754     */
755    protected boolean shouldFormatOutput() {
756        return m_doIndent;
757    }
758
759    /**
760     * Receive notification of the beginning of an element.
761     *
762     *
763     * @param namespaceURI
764     * @param localName
765     * @param name
766     *            The element type name.
767     * @param atts
768     *            The attributes attached to the element, if any.
769     * @throws org.xml.sax.SAXException
770     *             Any SAX exception, possibly wrapping another exception.
771     * @see #endElement
772     * @see org.xml.sax.AttributeList
773     */
774    public void startElement(
775        String namespaceURI,
776        String localName,
777        String name,
778        Attributes atts)
779        throws SAXException
780    {
781        if (m_doIndent) {
782            // will add extra one if having namespace but no matter
783            m_childNodeNum++;
784            flushCharactersBuffer();
785        }
786        ElemContext elemContext = m_elemContext;
787
788        // clean up any pending things first
789        if (elemContext.m_startTagOpen)
790        {
791            closeStartTag();
792            elemContext.m_startTagOpen = false;
793        }
794        else if (m_cdataTagOpen)
795        {
796            closeCDATA();
797            m_cdataTagOpen = false;
798        }
799        else if (m_needToCallStartDocument)
800        {
801            startDocumentInternal();
802            m_needToCallStartDocument = false;
803        }
804
805
806        // if this element has a namespace then treat it like XML
807        if (null != namespaceURI && namespaceURI.length() > 0)
808        {
809            super.startElement(namespaceURI, localName, name, atts);
810
811            return;
812        }
813
814        try
815        {
816            // getElemDesc2(name) is faster than getElemDesc(name)
817            ElemDesc elemDesc = getElemDesc2(name);
818            int elemFlags = elemDesc.getFlags();
819
820            // deal with indentation issues first
821            if (m_doIndent)
822            {
823                boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
824                if ((elemContext.m_elementName != null)
825                        // If this element is a block element,
826                        // or if this is not a block element, then if the
827                        // previous is neither a text nor an inline
828                        && (isBlockElement || (!(m_isprevtext || !m_isprevblock))))
829                {
830                    m_startNewLine = true;
831
832                    indent();
833                }
834                m_isprevblock = isBlockElement;
835            }
836
837            // save any attributes for later processing
838            if (atts != null)
839                addAttributes(atts);
840
841            m_isprevtext = false;
842            final java.io.Writer writer = m_writer;
843            writer.write('<');
844            writer.write(name);
845
846            if (m_doIndent) {
847                m_childNodeNumStack.add(m_childNodeNum);
848                m_childNodeNum = 0;
849            }
850
851            if (m_tracer != null)
852                firePseudoAttributes();
853
854            if ((elemFlags & ElemDesc.EMPTY) != 0)
855            {
856                // an optimization for elements which are expected
857                // to be empty.
858                m_elemContext = elemContext.push();
859                /* XSLTC sometimes calls namespaceAfterStartElement()
860                 * so we need to remember the name
861                 */
862                m_elemContext.m_elementName = name;
863                m_elemContext.m_elementDesc = elemDesc;
864                return;
865            }
866            else
867            {
868                elemContext = elemContext.push(namespaceURI,localName,name);
869                m_elemContext = elemContext;
870                elemContext.m_elementDesc = elemDesc;
871                elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
872
873                // set m_startNewLine for the next element
874                if (m_doIndent) {
875                    // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(),
876                    // in this branch m_elemContext.m_elementName is not null
877                    boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
878                    if (isBlockElement)
879                        m_startNewLine = true;
880                }
881            }
882
883
884            if ((elemFlags & ElemDesc.HEADELEM) != 0)
885            {
886                // This is the <HEAD> element, do some special processing
887                closeStartTag();
888                elemContext.m_startTagOpen = false;
889                if (!m_omitMetaTag)
890                {
891                    if (m_doIndent)
892                        indent();
893                    writer.write(
894                        "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
895                    String encoding = getEncoding();
896                    String encode = Encodings.getMimeEncoding(encoding);
897                    writer.write(encode);
898                    writer.write("\">");
899                }
900            }
901        }
902        catch (IOException e)
903        {
904            throw new SAXException(e);
905        }
906    }
907
908    /**
909     *  Receive notification of the end of an element.
910     *
911     *
912     *  @param namespaceURI
913     *  @param localName
914     *  @param name The element type name
915     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
916     *             wrapping another exception.
917     */
918    public final void endElement(
919        final String namespaceURI,
920        final String localName,
921        final String name)
922        throws org.xml.sax.SAXException
923    {
924        if (m_doIndent) {
925            flushCharactersBuffer();
926        }
927        // deal with any pending issues
928        if (m_cdataTagOpen)
929            closeCDATA();
930
931        // if the element has a namespace, treat it like XML, not HTML
932        if (null != namespaceURI && namespaceURI.length() > 0)
933        {
934            super.endElement(namespaceURI, localName, name);
935
936            return;
937        }
938
939        try
940        {
941
942            ElemContext elemContext = m_elemContext;
943            final ElemDesc elemDesc = elemContext.m_elementDesc;
944            final int elemFlags = elemDesc.getFlags();
945            final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
946
947            // deal with any indentation issues
948            if (m_doIndent)
949            {
950                final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
951                boolean shouldIndent = false;
952
953                // If this element is a block element,
954                // or if this is not a block element, then if the previous is
955                // neither a text nor an inline
956                if (isBlockElement || (!(m_isprevtext || !m_isprevblock)))
957                {
958                    m_startNewLine = true;
959                    shouldIndent = true;
960                }
961                if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext))
962                    indent(elemContext.m_currentElemDepth - 1);
963
964                m_isprevblock = isBlockElement;
965            }
966
967            final java.io.Writer writer = m_writer;
968            if (!elemContext.m_startTagOpen)
969            {
970                writer.write("</");
971                writer.write(name);
972                writer.write('>');
973            }
974            else
975            {
976                // the start-tag open when this method was called,
977                // so we need to process it now.
978
979                if (m_tracer != null)
980                    super.fireStartElem(name);
981
982                // the starting tag was still open when we received this endElement() call
983                // so we need to process any gathered attributes NOW, before they go away.
984                int nAttrs = m_attributes.getLength();
985                if (nAttrs > 0)
986                {
987                    processAttributes(m_writer, nAttrs);
988                    // clear attributes object for re-use with next element
989                    m_attributes.clear();
990                }
991                if (!elemEmpty)
992                {
993                    // As per Dave/Paul recommendation 12/06/2000
994                    // if (shouldIndent)
995                    // writer.write('>');
996                    //  indent(m_currentIndent);
997
998                    writer.write("></");
999                    writer.write(name);
1000                    writer.write('>');
1001                }
1002                else
1003                {
1004                    writer.write('>');
1005                }
1006            }
1007
1008            if (m_doIndent) {
1009                m_childNodeNum = m_childNodeNumStack.remove(m_childNodeNumStack.size() - 1);
1010                // clean up because the element has ended
1011                m_isprevtext = false;
1012            }
1013            // fire off the end element event
1014            if (m_tracer != null)
1015                super.fireEndElem(name);
1016
1017            // OPTIMIZE-EMPTY
1018            if (elemEmpty)
1019            {
1020                // a quick exit if the HTML element had no children.
1021                // This block of code can be removed if the corresponding block of code
1022                // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1023                m_elemContext = elemContext.m_prev;
1024                return;
1025            }
1026
1027            // some more clean because the element has ended.
1028            m_elemContext = elemContext.m_prev;
1029//            m_isRawStack.pop();
1030        }
1031        catch (IOException e)
1032        {
1033            throw new SAXException(e);
1034        }
1035    }
1036
1037    /**
1038     * Process an attribute.
1039     * @param   writer The writer to write the processed output to.
1040     * @param   name   The name of the attribute.
1041     * @param   value   The value of the attribute.
1042     * @param   elemDesc The description of the HTML element
1043     *           that has this attribute.
1044     *
1045     * @throws org.xml.sax.SAXException
1046     */
1047    protected void processAttribute(
1048        java.io.Writer writer,
1049        String name,
1050        String value,
1051        ElemDesc elemDesc)
1052        throws IOException
1053    {
1054        writer.write(' ');
1055
1056        if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1057            && elemDesc != null
1058            && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1059        {
1060            writer.write(name);
1061        }
1062        else
1063        {
1064            // %REVIEW% %OPT%
1065            // Two calls to single-char write may NOT
1066            // be more efficient than one to string-write...
1067            writer.write(name);
1068            writer.write("=\"");
1069            if (   elemDesc != null
1070                && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1071                writeAttrURI(writer, value, m_specialEscapeURLs);
1072            else
1073                writeAttrString(writer, value, this.getEncoding());
1074            writer.write('"');
1075
1076        }
1077    }
1078
1079    /**
1080     * Tell if a character is an ASCII digit.
1081     */
1082    private boolean isASCIIDigit(char c)
1083    {
1084        return (c >= '0' && c <= '9');
1085    }
1086
1087    /**
1088     * Make an integer into an HH hex value.
1089     * Does no checking on the size of the input, since this
1090     * is only meant to be used locally by writeAttrURI.
1091     *
1092     * @param i must be a value less than 255.
1093     *
1094     * @return should be a two character string.
1095     */
1096    private static String makeHHString(int i)
1097    {
1098        String s = Integer.toHexString(i).toUpperCase();
1099        if (s.length() == 1)
1100        {
1101            s = "0" + s;
1102        }
1103        return s;
1104    }
1105
1106    /**
1107    * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1108    * @param str must be 2 characters long
1109    *
1110    * @return true or false
1111    */
1112    private boolean isHHSign(String str)
1113    {
1114        boolean sign = true;
1115        try
1116        {
1117            char r = (char) Integer.parseInt(str, 16);
1118        }
1119        catch (NumberFormatException e)
1120        {
1121            sign = false;
1122        }
1123        return sign;
1124    }
1125
1126    /**
1127     * Write the specified <var>string</var> after substituting non ASCII characters,
1128     * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1129     *
1130     * @param   string      String to convert to XML format.
1131     * @param doURLEscaping True if we should try to encode as
1132     *                      per http://www.ietf.org/rfc/rfc2396.txt.
1133     *
1134     * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1135     */
1136    public void writeAttrURI(
1137        final java.io.Writer writer, String string, boolean doURLEscaping)
1138        throws IOException
1139    {
1140        // http://www.ietf.org/rfc/rfc2396.txt says:
1141        // A URI is always in an "escaped" form, since escaping or unescaping a
1142        // completed URI might change its semantics.  Normally, the only time
1143        // escape encodings can safely be made is when the URI is being created
1144        // from its component parts; each component may have its own set of
1145        // characters that are reserved, so only the mechanism responsible for
1146        // generating or interpreting that component can determine whether or
1147        // not escaping a character will change its semantics. Likewise, a URI
1148        // must be separated into its components before the escaped characters
1149        // within those components can be safely decoded.
1150        //
1151        // ...So we do our best to do limited escaping of the URL, without
1152        // causing damage.  If the URL is already properly escaped, in theory, this
1153        // function should not change the string value.
1154
1155        final int end = string.length();
1156        if (end > m_attrBuff.length)
1157        {
1158           m_attrBuff = new char[end*2 + 1];
1159        }
1160        string.getChars(0,end, m_attrBuff, 0);
1161        final char[] chars = m_attrBuff;
1162
1163        int cleanStart = 0;
1164        int cleanLength = 0;
1165
1166
1167        char ch = 0;
1168        for (int i = 0; i < end; i++)
1169        {
1170            ch = chars[i];
1171
1172            if ((ch < 32) || (ch > 126))
1173            {
1174                if (cleanLength > 0)
1175                {
1176                    writer.write(chars, cleanStart, cleanLength);
1177                    cleanLength = 0;
1178                }
1179                if (doURLEscaping)
1180                {
1181                    // Encode UTF16 to UTF8.
1182                    // Reference is Unicode, A Primer, by Tony Graham.
1183                    // Page 92.
1184
1185                    // Note that Kay doesn't escape 0x20...
1186                    //  if(ch == 0x20) // Not sure about this... -sb
1187                    //  {
1188                    //    writer.write(ch);
1189                    //  }
1190                    //  else
1191                    if (ch <= 0x7F)
1192                    {
1193                        writer.write('%');
1194                        writer.write(makeHHString(ch));
1195                    }
1196                    else if (ch <= 0x7FF)
1197                    {
1198                        // Clear low 6 bits before rotate, put high 4 bits in low byte,
1199                        // and set two high bits.
1200                        int high = (ch >> 6) | 0xC0;
1201                        int low = (ch & 0x3F) | 0x80;
1202                        // First 6 bits, + high bit
1203                        writer.write('%');
1204                        writer.write(makeHHString(high));
1205                        writer.write('%');
1206                        writer.write(makeHHString(low));
1207                    }
1208                    else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1209                    {
1210                        // I'm sure this can be done in 3 instructions, but I choose
1211                        // to try and do it exactly like it is done in the book, at least
1212                        // until we are sure this is totally clean.  I don't think performance
1213                        // is a big issue with this particular function, though I could be
1214                        // wrong.  Also, the stuff below clearly does more masking than
1215                        // it needs to do.
1216
1217                        // Clear high 6 bits.
1218                        int highSurrogate = ((int) ch) & 0x03FF;
1219
1220                        // Middle 4 bits (wwww) + 1
1221                        // "Note that the value of wwww from the high surrogate bit pattern
1222                        // is incremented to make the uuuuu bit pattern in the scalar value
1223                        // so the surrogate pair don't address the BMP."
1224                        int wwww = ((highSurrogate & 0x03C0) >> 6);
1225                        int uuuuu = wwww + 1;
1226
1227                        // next 4 bits
1228                        int zzzz = (highSurrogate & 0x003C) >> 2;
1229
1230                        // low 2 bits
1231                        int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1232
1233                        // Get low surrogate character.
1234                        ch = chars[++i];
1235
1236                        // Clear high 6 bits.
1237                        int lowSurrogate = ((int) ch) & 0x03FF;
1238
1239                        // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1240                        yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1241
1242                        // bottom 6 bits.
1243                        int xxxxxx = (lowSurrogate & 0x003F);
1244
1245                        int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1246                        int byte2 =
1247                            0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1248                        int byte3 = 0x80 | yyyyyy;
1249                        int byte4 = 0x80 | xxxxxx;
1250
1251                        writer.write('%');
1252                        writer.write(makeHHString(byte1));
1253                        writer.write('%');
1254                        writer.write(makeHHString(byte2));
1255                        writer.write('%');
1256                        writer.write(makeHHString(byte3));
1257                        writer.write('%');
1258                        writer.write(makeHHString(byte4));
1259                    }
1260                    else
1261                    {
1262                        int high = (ch >> 12) | 0xE0; // top 4 bits
1263                        int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1264                        // middle 6 bits
1265                        int low = (ch & 0x3F) | 0x80;
1266                        // First 6 bits, + high bit
1267                        writer.write('%');
1268                        writer.write(makeHHString(high));
1269                        writer.write('%');
1270                        writer.write(makeHHString(middle));
1271                        writer.write('%');
1272                        writer.write(makeHHString(low));
1273                    }
1274
1275                }
1276                else if (escapingNotNeeded(ch))
1277                {
1278                    writer.write(ch);
1279                }
1280                else
1281                {
1282                    writer.write("&#");
1283                    writer.write(Integer.toString(ch));
1284                    writer.write(';');
1285                }
1286                // In this character range we have first written out any previously accumulated
1287                // "clean" characters, then processed the current more complicated character,
1288                // which may have incremented "i".
1289                // We now we reset the next possible clean character.
1290                cleanStart = i + 1;
1291            }
1292            // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1293            // not allowing quotes in the URI proper syntax, nor in the fragment
1294            // identifier, we believe that it's OK to double escape quotes.
1295            else if (ch == '"')
1296            {
1297                // If the character is a '%' number number, try to avoid double-escaping.
1298                // There is a question if this is legal behavior.
1299
1300                // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1301                // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1302
1303                //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1304
1305                // We are no longer escaping '%'
1306
1307                if (cleanLength > 0)
1308                {
1309                    writer.write(chars, cleanStart, cleanLength);
1310                    cleanLength = 0;
1311                }
1312
1313
1314                // Mike Kay encodes this as &#34;, so he may know something I don't?
1315                if (doURLEscaping)
1316                    writer.write("%22");
1317                else
1318                    writer.write("&quot;"); // we have to escape this, I guess.
1319
1320                // We have written out any clean characters, then the escaped '%' and now we
1321                // We now we reset the next possible clean character.
1322                cleanStart = i + 1;
1323            }
1324            else if (ch == '&')
1325            {
1326                // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1327                // instead of "&" to avoid confusion with the beginning of a character
1328                // reference (entity reference open delimiter).
1329                if (cleanLength > 0)
1330                {
1331                    writer.write(chars, cleanStart, cleanLength);
1332                    cleanLength = 0;
1333                }
1334                writer.write("&amp;");
1335                cleanStart = i + 1;
1336            }
1337            else
1338            {
1339                // no processing for this character, just count how
1340                // many characters in a row that we have that need no processing
1341                cleanLength++;
1342            }
1343        }
1344
1345        // are there any clean characters at the end of the array
1346        // that we haven't processed yet?
1347        if (cleanLength > 1)
1348        {
1349            // if the whole string can be written out as-is do so
1350            // otherwise write out the clean chars at the end of the
1351            // array
1352            if (cleanStart == 0)
1353                writer.write(string);
1354            else
1355                writer.write(chars, cleanStart, cleanLength);
1356        }
1357        else if (cleanLength == 1)
1358        {
1359            // a little optimization for 1 clean character
1360            // (we could have let the previous if(...) handle them all)
1361            writer.write(ch);
1362        }
1363    }
1364
1365    /**
1366     * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1367     * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1368     *
1369     * @param   string      String to convert to XML format.
1370     * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1371     *
1372     * @throws org.xml.sax.SAXException
1373     */
1374    public void writeAttrString(
1375        final java.io.Writer writer, String string, String encoding)
1376        throws IOException
1377    {
1378        final int end = string.length();
1379        if (end > m_attrBuff.length)
1380        {
1381            m_attrBuff = new char[end * 2 + 1];
1382        }
1383        string.getChars(0, end, m_attrBuff, 0);
1384        final char[] chars = m_attrBuff;
1385
1386
1387
1388        int cleanStart = 0;
1389        int cleanLength = 0;
1390
1391        char ch = 0;
1392        for (int i = 0; i < end; i++)
1393        {
1394            ch = chars[i];
1395
1396            // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1397            // System.out.println("ch: "+(int)ch);
1398            // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1399            // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1400            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1401            {
1402                cleanLength++;
1403            }
1404            else if ('<' == ch || '>' == ch)
1405            {
1406                cleanLength++; // no escaping in this case, as specified in 15.2
1407            }
1408            else if (
1409                ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1410            {
1411                cleanLength++; // no escaping in this case, as specified in 15.2
1412            }
1413            else
1414            {
1415                if (cleanLength > 0)
1416                {
1417                    writer.write(chars,cleanStart,cleanLength);
1418                    cleanLength = 0;
1419                }
1420                int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1421
1422                if (i != pos)
1423                {
1424                    i = pos - 1;
1425                }
1426                else
1427                {
1428                    if (Encodings.isHighUTF16Surrogate(ch))
1429                    {
1430
1431                            writeUTF16Surrogate(ch, chars, i, end);
1432                            i++; // two input characters processed
1433                                 // this increments by one and the for()
1434                                 // loop itself increments by another one.
1435                    }
1436
1437                    // The next is kind of a hack to keep from escaping in the case
1438                    // of Shift_JIS and the like.
1439
1440                    /*
1441                    else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1442                    && (ch != 160))
1443                    {
1444                    writer.write(ch);  // no escaping in this case
1445                    }
1446                    else
1447                    */
1448                    String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1449                    if (null != outputStringForChar)
1450                    {
1451                        writer.write(outputStringForChar);
1452                    }
1453                    else if (escapingNotNeeded(ch))
1454                    {
1455                        writer.write(ch); // no escaping in this case
1456                    }
1457                    else
1458                    {
1459                        writer.write("&#");
1460                        writer.write(Integer.toString(ch));
1461                        writer.write(';');
1462                    }
1463                }
1464                cleanStart = i + 1;
1465            }
1466        } // end of for()
1467
1468        // are there any clean characters at the end of the array
1469        // that we haven't processed yet?
1470        if (cleanLength > 1)
1471        {
1472            // if the whole string can be written out as-is do so
1473            // otherwise write out the clean chars at the end of the
1474            // array
1475            if (cleanStart == 0)
1476                writer.write(string);
1477            else
1478                writer.write(chars, cleanStart, cleanLength);
1479        }
1480        else if (cleanLength == 1)
1481        {
1482            // a little optimization for 1 clean character
1483            // (we could have let the previous if(...) handle them all)
1484            writer.write(ch);
1485        }
1486    }
1487
1488
1489
1490    /**
1491     * Receive notification of character data.
1492     *
1493     * <p>The Parser will call this method to report each chunk of
1494     * character data.  SAX parsers may return all contiguous character
1495     * data in a single chunk, or they may split it into several
1496     * chunks; however, all of the characters in any single event
1497     * must come from the same external entity, so that the Locator
1498     * provides useful information.</p>
1499     *
1500     * <p>The application must not attempt to read from the array
1501     * outside of the specified range.</p>
1502     *
1503     * <p>Note that some parsers will report whitespace using the
1504     * ignorableWhitespace() method rather than this one (validating
1505     * parsers must do so).</p>
1506     *
1507     * @param chars The characters from the XML document.
1508     * @param start The start position in the array.
1509     * @param length The number of characters to read from the array.
1510     * @throws org.xml.sax.SAXException Any SAX exception, possibly
1511     *            wrapping another exception.
1512     * @see #ignorableWhitespace
1513     * @see org.xml.sax.Locator
1514     *
1515     * @throws org.xml.sax.SAXException
1516     */
1517    public final void characters(char chars[], int start, int length)
1518        throws org.xml.sax.SAXException
1519    {
1520
1521        if (m_elemContext.m_isRaw)
1522        {
1523            try
1524            {
1525                if (m_elemContext.m_startTagOpen)
1526                {
1527                    closeStartTag();
1528                    m_elemContext.m_startTagOpen = false;
1529                }
1530
1531//              With m_ispreserve just set true it looks like shouldIndent()
1532//              will always return false, so drop any possible indentation.
1533//              if (shouldIndent())
1534//                  indent();
1535
1536                // writer.write("<![CDATA[");
1537                // writer.write(chars, start, length);
1538                writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1539                m_isprevtext = true;
1540                // writer.write("]]>");
1541
1542                // time to generate characters event
1543                if (m_tracer != null)
1544                    super.fireCharEvent(chars, start, length);
1545
1546                return;
1547            }
1548            catch (IOException ioe)
1549            {
1550                throw new org.xml.sax.SAXException(
1551                    Utils.messages.createMessage(
1552                        MsgKey.ER_OIERROR,
1553                        null),
1554                    ioe);
1555                //"IO error", ioe);
1556            }
1557        }
1558        else
1559        {
1560            super.characters(chars, start, length);
1561        }
1562    }
1563
1564    /**
1565     *  Receive notification of cdata.
1566     *
1567     *  <p>The Parser will call this method to report each chunk of
1568     *  character data.  SAX parsers may return all contiguous character
1569     *  data in a single chunk, or they may split it into several
1570     *  chunks; however, all of the characters in any single event
1571     *  must come from the same external entity, so that the Locator
1572     *  provides useful information.</p>
1573     *
1574     *  <p>The application must not attempt to read from the array
1575     *  outside of the specified range.</p>
1576     *
1577     *  <p>Note that some parsers will report whitespace using the
1578     *  ignorableWhitespace() method rather than this one (validating
1579     *  parsers must do so).</p>
1580     *
1581     *  @param ch The characters from the XML document.
1582     *  @param start The start position in the array.
1583     *  @param length The number of characters to read from the array.
1584     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1585     *             wrapping another exception.
1586     *  @see #ignorableWhitespace
1587     *  @see org.xml.sax.Locator
1588     *
1589     * @throws org.xml.sax.SAXException
1590     */
1591    public final void cdata(char ch[], int start, int length)
1592        throws org.xml.sax.SAXException
1593    {
1594        if ((null != m_elemContext.m_elementName)
1595            && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1596                || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1597        {
1598            try
1599            {
1600                if (m_elemContext.m_startTagOpen)
1601                {
1602                    closeStartTag();
1603                    m_elemContext.m_startTagOpen = false;
1604                }
1605
1606                if (shouldIndent())
1607                    indent();
1608
1609                // writer.write(ch, start, length);
1610                writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1611            }
1612            catch (IOException ioe)
1613            {
1614                throw new org.xml.sax.SAXException(
1615                    Utils.messages.createMessage(
1616                        MsgKey.ER_OIERROR,
1617                        null),
1618                    ioe);
1619                //"IO error", ioe);
1620            }
1621        }
1622        else
1623        {
1624            super.cdata(ch, start, length);
1625        }
1626    }
1627
1628    /**
1629     *  Receive notification of a processing instruction.
1630     *
1631     *  @param target The processing instruction target.
1632     *  @param data The processing instruction data, or null if
1633     *         none was supplied.
1634     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1635     *             wrapping another exception.
1636     *
1637     * @throws org.xml.sax.SAXException
1638     */
1639    public void processingInstruction(String target, String data)
1640        throws org.xml.sax.SAXException
1641    {
1642        if (m_doIndent) {
1643            m_childNodeNum++;
1644            flushCharactersBuffer();
1645        }
1646        // Process any pending starDocument and startElement first.
1647        flushPending();
1648
1649        // Use a fairly nasty hack to tell if the next node is supposed to be
1650        // unescaped text.
1651        if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1652        {
1653            startNonEscaping();
1654        }
1655        else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1656        {
1657            endNonEscaping();
1658        }
1659        else
1660        {
1661            try
1662            {
1663            if (m_elemContext.m_startTagOpen)
1664            {
1665                closeStartTag();
1666                m_elemContext.m_startTagOpen = false;
1667            }
1668            else if (m_needToCallStartDocument)
1669                startDocumentInternal();
1670
1671            if (shouldIndent())
1672                indent();
1673
1674            final java.io.Writer writer = m_writer;
1675            //writer.write("<?" + target);
1676            writer.write("<?");
1677            writer.write(target);
1678
1679            if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1680                writer.write(' ');
1681
1682            //writer.write(data + ">"); // different from XML
1683            writer.write(data); // different from XML
1684            writer.write('>'); // different from XML
1685
1686            // Always output a newline char if not inside of an
1687            // element. The whitespace is not significant in that
1688            // case.
1689            if (m_elemContext.m_currentElemDepth <= 0)
1690                outputLineSep();
1691
1692            m_startNewLine = true;
1693            }
1694            catch(IOException e)
1695            {
1696                throw new SAXException(e);
1697            }
1698        }
1699
1700        // now generate the PI event
1701        if (m_tracer != null)
1702            super.fireEscapingEvent(target, data);
1703     }
1704
1705    /**
1706     * Receive notivication of a entityReference.
1707     *
1708     * @param name non-null reference to entity name string.
1709     *
1710     * @throws org.xml.sax.SAXException
1711     */
1712    public final void entityReference(String name)
1713        throws org.xml.sax.SAXException
1714    {
1715        try
1716        {
1717
1718        final java.io.Writer writer = m_writer;
1719        writer.write('&');
1720        writer.write(name);
1721        writer.write(';');
1722
1723        } catch(IOException e)
1724        {
1725            throw new SAXException(e);
1726        }
1727    }
1728    /**
1729     * @see ExtendedContentHandler#endElement(String)
1730     */
1731    public final void endElement(String elemName) throws SAXException
1732    {
1733        endElement(null, null, elemName);
1734    }
1735
1736    /**
1737     * Process the attributes, which means to write out the currently
1738     * collected attributes to the writer. The attributes are not
1739     * cleared by this method
1740     *
1741     * @param writer the writer to write processed attributes to.
1742     * @param nAttrs the number of attributes in m_attributes
1743     * to be processed
1744     *
1745     * @throws org.xml.sax.SAXException
1746     */
1747    public void processAttributes(java.io.Writer writer, int nAttrs)
1748        throws IOException,SAXException
1749    {
1750            /*
1751             * process the collected attributes
1752             */
1753            for (int i = 0; i < nAttrs; i++)
1754            {
1755                processAttribute(
1756                    writer,
1757                    m_attributes.getQName(i),
1758                    m_attributes.getValue(i),
1759                    m_elemContext.m_elementDesc);
1760            }
1761    }
1762
1763    /**
1764     * For the enclosing elements starting tag write out out any attributes
1765     * followed by ">"
1766     *
1767     *@throws org.xml.sax.SAXException
1768     */
1769    protected void closeStartTag() throws SAXException
1770    {
1771            try
1772            {
1773
1774            // finish processing attributes, time to fire off the start element event
1775            if (m_tracer != null)
1776                super.fireStartElem(m_elemContext.m_elementName);
1777
1778            int nAttrs = m_attributes.getLength();
1779            if (nAttrs>0)
1780            {
1781                processAttributes(m_writer, nAttrs);
1782                // clear attributes object for re-use with next element
1783                m_attributes.clear();
1784            }
1785
1786            m_writer.write('>');
1787
1788            /* whether Xalan or XSLTC, we have the prefix mappings now, so
1789             * lets determine if the current element is specified in the cdata-
1790             * section-elements list.
1791             */
1792            if (m_StringOfCDATASections != null)
1793                m_elemContext.m_isCdataSection = isCdataSection();
1794
1795            }
1796            catch(IOException e)
1797            {
1798                throw new SAXException(e);
1799            }
1800    }
1801
1802        /**
1803         * This method is used when a prefix/uri namespace mapping
1804         * is indicated after the element was started with a
1805         * startElement() and before and endElement().
1806         * startPrefixMapping(prefix,uri) would be used before the
1807         * startElement() call.
1808         * @param uri the URI of the namespace
1809         * @param prefix the prefix associated with the given URI.
1810         *
1811         * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1812         */
1813        public void namespaceAfterStartElement(String prefix, String uri)
1814            throws SAXException
1815        {
1816            // hack for XSLTC with finding URI for default namespace
1817            if (m_elemContext.m_elementURI == null)
1818            {
1819                String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1820                if (prefix1 == null && EMPTYSTRING.equals(prefix))
1821                {
1822                    // the elements URI is not known yet, and it
1823                    // doesn't have a prefix, and we are currently
1824                    // setting the uri for prefix "", so we have
1825                    // the uri for the element... lets remember it
1826                    m_elemContext.m_elementURI = uri;
1827                }
1828            }
1829            startPrefixMapping(prefix,uri,false);
1830        }
1831
1832    public void startDTD(String name, String publicId, String systemId)
1833        throws SAXException
1834    {
1835        m_inDTD = true;
1836        super.startDTD(name, publicId, systemId);
1837    }
1838
1839    /**
1840     * Report the end of DTD declarations.
1841     * @throws org.xml.sax.SAXException The application may raise an exception.
1842     * @see #startDTD
1843     */
1844    public void endDTD() throws org.xml.sax.SAXException
1845    {
1846        m_inDTD = false;
1847        /* for ToHTMLStream the DOCTYPE is entirely output in the
1848         * startDocumentInternal() method, so don't do anything here
1849         */
1850    }
1851    /**
1852     * This method does nothing.
1853     */
1854    public void attributeDecl(
1855        String eName,
1856        String aName,
1857        String type,
1858        String valueDefault,
1859        String value)
1860        throws SAXException
1861    {
1862        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1863    }
1864
1865    /**
1866     * This method does nothing.
1867     */
1868    public void elementDecl(String name, String model) throws SAXException
1869    {
1870        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1871    }
1872    /**
1873     * This method does nothing.
1874     */
1875    public void internalEntityDecl(String name, String value)
1876        throws SAXException
1877    {
1878        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1879    }
1880    /**
1881     * This method does nothing.
1882     */
1883    public void externalEntityDecl(
1884        String name,
1885        String publicId,
1886        String systemId)
1887        throws SAXException
1888    {
1889        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1890    }
1891
1892    /**
1893     * This method is used to add an attribute to the currently open element.
1894     * The caller has guaranted that this attribute is unique, which means that it
1895     * not been seen before and will not be seen again.
1896     *
1897     * @param name the qualified name of the attribute
1898     * @param value the value of the attribute which can contain only
1899     * ASCII printable characters characters in the range 32 to 127 inclusive.
1900     * @param flags the bit values of this integer give optimization information.
1901     */
1902    public void addUniqueAttribute(String name, String value, int flags)
1903        throws SAXException
1904    {
1905        try
1906        {
1907            final java.io.Writer writer = m_writer;
1908            if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1909            {
1910                // "flags" has indicated that the characters
1911                // '>'  '<'   '&'  and '"' are not in the value and
1912                // m_htmlcharInfo has recorded that there are no other
1913                // entities in the range 0 to 127 so we write out the
1914                // value directly
1915                writer.write(' ');
1916                writer.write(name);
1917                writer.write("=\"");
1918                writer.write(value);
1919                writer.write('"');
1920            }
1921            else if (
1922                (flags & HTML_ATTREMPTY) > 0
1923                    && (value.length() == 0 || value.equalsIgnoreCase(name)))
1924            {
1925                writer.write(' ');
1926                writer.write(name);
1927            }
1928            else
1929            {
1930                writer.write(' ');
1931                writer.write(name);
1932                writer.write("=\"");
1933                if ((flags & HTML_ATTRURL) > 0)
1934                {
1935                    writeAttrURI(writer, value, m_specialEscapeURLs);
1936                }
1937                else
1938                {
1939                    writeAttrString(writer, value, this.getEncoding());
1940                }
1941                writer.write('"');
1942            }
1943        } catch (IOException e) {
1944            throw new SAXException(e);
1945        }
1946    }
1947
1948    public void comment(char ch[], int start, int length)
1949            throws SAXException
1950    {
1951        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1952        if (m_inDTD)
1953            return;
1954        super.comment(ch, start, length);
1955    }
1956
1957    public boolean reset()
1958    {
1959        boolean ret = super.reset();
1960        if (!ret)
1961            return false;
1962        initToHTMLStream();
1963        return true;
1964    }
1965
1966    private void initToHTMLStream()
1967    {
1968        m_isprevblock = false;
1969        m_inDTD = false;
1970        m_omitMetaTag = false;
1971        m_specialEscapeURLs = true;
1972    }
1973
1974    static class Trie
1975    {
1976        /**
1977         * A digital search trie for 7-bit ASCII text
1978         * The API is a subset of java.util.Hashtable
1979         * The key must be a 7-bit ASCII string
1980         * The value may be any Java Object
1981         * One can get an object stored in a trie from its key,
1982         * but the search is either case sensitive or case
1983         * insensitive to the characters in the key, and this
1984         * choice of sensitivity or insensitivity is made when
1985         * the Trie is created, before any objects are put in it.
1986         *
1987         * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
1988         * It exists to cut the serializers dependancy on that package.
1989         *
1990         * @xsl.usage internal
1991         */
1992
1993        /** Size of the m_nextChar array.  */
1994        public static final int ALPHA_SIZE = 128;
1995
1996        /** The root node of the tree.    */
1997        final Node m_Root;
1998
1999        /** helper buffer to convert Strings to char arrays */
2000        private char[] m_charBuffer = new char[0];
2001
2002        /** true if the search for an object is lower case only with the key */
2003        private final boolean m_lowerCaseOnly;
2004
2005        /**
2006         * Construct the trie that has a case insensitive search.
2007         */
2008        public Trie()
2009        {
2010            m_Root = new Node();
2011            m_lowerCaseOnly = false;
2012        }
2013
2014        /**
2015         * Construct the trie given the desired case sensitivity with the key.
2016         * @param lowerCaseOnly true if the search keys are to be loser case only,
2017         * not case insensitive.
2018         */
2019        public Trie(boolean lowerCaseOnly)
2020        {
2021            m_Root = new Node();
2022            m_lowerCaseOnly = lowerCaseOnly;
2023        }
2024
2025        /**
2026         * Put an object into the trie for lookup.
2027         *
2028         * @param key must be a 7-bit ASCII string
2029         * @param value any java object.
2030         *
2031         * @return The old object that matched key, or null.
2032         */
2033        public Object put(String key, Object value)
2034        {
2035
2036            final int len = key.length();
2037            if (len > m_charBuffer.length)
2038            {
2039                // make the biggest buffer ever needed in get(String)
2040                m_charBuffer = new char[len];
2041            }
2042
2043            Node node = m_Root;
2044
2045            for (int i = 0; i < len; i++)
2046            {
2047                Node nextNode =
2048                    node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2049
2050                if (nextNode != null)
2051                {
2052                    node = nextNode;
2053                }
2054                else
2055                {
2056                    for (; i < len; i++)
2057                    {
2058                        Node newNode = new Node();
2059                        if (m_lowerCaseOnly)
2060                        {
2061                            // put this value into the tree only with a lower case key
2062                            node.m_nextChar[Character.toLowerCase(
2063                                key.charAt(i))] =
2064                                newNode;
2065                        }
2066                        else
2067                        {
2068                            // put this value into the tree with a case insensitive key
2069                            node.m_nextChar[Character.toUpperCase(
2070                                key.charAt(i))] =
2071                                newNode;
2072                            node.m_nextChar[Character.toLowerCase(
2073                                key.charAt(i))] =
2074                                newNode;
2075                        }
2076                        node = newNode;
2077                    }
2078                    break;
2079                }
2080            }
2081
2082            Object ret = node.m_Value;
2083
2084            node.m_Value = value;
2085
2086            return ret;
2087        }
2088
2089        /**
2090         * Get an object that matches the key.
2091         *
2092         * @param key must be a 7-bit ASCII string
2093         *
2094         * @return The object that matches the key, or null.
2095         */
2096        public Object get(final String key)
2097        {
2098
2099            final int len = key.length();
2100
2101            /* If the name is too long, we won't find it, this also keeps us
2102             * from overflowing m_charBuffer
2103             */
2104            if (m_charBuffer.length < len)
2105                return null;
2106
2107            Node node = m_Root;
2108            switch (len) // optimize the look up based on the number of chars
2109            {
2110                // case 0 looks silly, but the generated bytecode runs
2111                // faster for lookup of elements of length 2 with this in
2112                // and a fair bit faster.  Don't know why.
2113                case 0 :
2114                    {
2115                        return null;
2116                    }
2117
2118                case 1 :
2119                    {
2120                        final char ch = key.charAt(0);
2121                        if (ch < ALPHA_SIZE)
2122                        {
2123                            node = node.m_nextChar[ch];
2124                            if (node != null)
2125                                return node.m_Value;
2126                        }
2127                        return null;
2128                    }
2129                    //                comment out case 2 because the default is faster
2130                    //                case 2 :
2131                    //                    {
2132                    //                        final char ch0 = key.charAt(0);
2133                    //                        final char ch1 = key.charAt(1);
2134                    //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2135                    //                        {
2136                    //                            node = node.m_nextChar[ch0];
2137                    //                            if (node != null)
2138                    //                            {
2139                    //
2140                    //                                if (ch1 < ALPHA_SIZE)
2141                    //                                {
2142                    //                                    node = node.m_nextChar[ch1];
2143                    //                                    if (node != null)
2144                    //                                        return node.m_Value;
2145                    //                                }
2146                    //                            }
2147                    //                        }
2148                    //                        return null;
2149                    //                   }
2150                default :
2151                    {
2152                        for (int i = 0; i < len; i++)
2153                        {
2154                            // A thread-safe way to loop over the characters
2155                            final char ch = key.charAt(i);
2156                            if (ALPHA_SIZE <= ch)
2157                            {
2158                                // the key is not 7-bit ASCII so we won't find it here
2159                                return null;
2160                            }
2161
2162                            node = node.m_nextChar[ch];
2163                            if (node == null)
2164                                return null;
2165                        }
2166
2167                        return node.m_Value;
2168                    }
2169            }
2170        }
2171
2172        /**
2173         * The node representation for the trie.
2174         * @xsl.usage internal
2175         */
2176        private class Node
2177        {
2178
2179            /**
2180             * Constructor, creates a Node[ALPHA_SIZE].
2181             */
2182            Node()
2183            {
2184                m_nextChar = new Node[ALPHA_SIZE];
2185                m_Value = null;
2186            }
2187
2188            /** The next nodes.   */
2189            final Node m_nextChar[];
2190
2191            /** The value.   */
2192            Object m_Value;
2193        }
2194        /**
2195         * Construct the trie from another Trie.
2196         * Both the existing Trie and this new one share the same table for
2197         * lookup, and it is assumed that the table is fully populated and
2198         * not changing anymore.
2199         *
2200         * @param existingTrie the Trie that this one is a copy of.
2201         */
2202        public Trie(Trie existingTrie)
2203        {
2204            // copy some fields from the existing Trie into this one.
2205            m_Root = existingTrie.m_Root;
2206            m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2207
2208            // get a buffer just big enough to hold the longest key in the table.
2209            int max = existingTrie.getLongestKeyLength();
2210            m_charBuffer = new char[max];
2211        }
2212
2213        /**
2214         * Get an object that matches the key.
2215         * This method is faster than get(), but is not thread-safe.
2216         *
2217         * @param key must be a 7-bit ASCII string
2218         *
2219         * @return The object that matches the key, or null.
2220         */
2221        public Object get2(final String key)
2222        {
2223
2224            final int len = key.length();
2225
2226            /* If the name is too long, we won't find it, this also keeps us
2227             * from overflowing m_charBuffer
2228             */
2229            if (m_charBuffer.length < len)
2230                return null;
2231
2232            Node node = m_Root;
2233            switch (len) // optimize the look up based on the number of chars
2234            {
2235                // case 0 looks silly, but the generated bytecode runs
2236                // faster for lookup of elements of length 2 with this in
2237                // and a fair bit faster.  Don't know why.
2238                case 0 :
2239                    {
2240                        return null;
2241                    }
2242
2243                case 1 :
2244                    {
2245                        final char ch = key.charAt(0);
2246                        if (ch < ALPHA_SIZE)
2247                        {
2248                            node = node.m_nextChar[ch];
2249                            if (node != null)
2250                                return node.m_Value;
2251                        }
2252                        return null;
2253                    }
2254                default :
2255                    {
2256                        /* Copy string into array. This is not thread-safe because
2257                         * it modifies the contents of m_charBuffer. If multiple
2258                         * threads were to use this Trie they all would be
2259                         * using this same array (not good). So this
2260                         * method is not thread-safe, but it is faster because
2261                         * converting to a char[] and looping over elements of
2262                         * the array is faster than a String's charAt(i).
2263                         */
2264                        key.getChars(0, len, m_charBuffer, 0);
2265
2266                        for (int i = 0; i < len; i++)
2267                        {
2268                            final char ch = m_charBuffer[i];
2269                            if (ALPHA_SIZE <= ch)
2270                            {
2271                                // the key is not 7-bit ASCII so we won't find it here
2272                                return null;
2273                            }
2274
2275                            node = node.m_nextChar[ch];
2276                            if (node == null)
2277                                return null;
2278                        }
2279
2280                        return node.m_Value;
2281                    }
2282            }
2283        }
2284
2285        /**
2286         * Get the length of the longest key used in the table.
2287         */
2288        public int getLongestKeyLength()
2289        {
2290            return m_charBuffer.length;
2291        }
2292    }
2293}
2294