1/*
2 * Copyright (c) 2015, 2017 Oracle and/or its affiliates. All rights reserved.
3 */
4/*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements.  See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21
22// Sep 14, 2000:
23//  Fixed serializer to report IO exception directly, instead at
24//  the end of document processing.
25//  Reported by Patrick Higgins <phiggins@transzap.com>
26// Aug 21, 2000:
27//  Fixed bug in startDocument not calling prepare.
28//  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
29// Aug 21, 2000:
30//  Added ability to omit DOCTYPE declaration.
31// Sep 1, 2000:
32//   If no output format is provided the serializer now defaults
33//   to ISO-8859-1 encoding. Reported by Mikael Staldal
34//   <d96-mst@d.kth.se>
35
36
37package com.sun.org.apache.xml.internal.serialize;
38
39import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
40import java.io.IOException;
41import java.io.OutputStream;
42import java.io.Writer;
43import java.util.Enumeration;
44import java.util.Locale;
45import java.util.Map;
46import org.w3c.dom.Attr;
47import org.w3c.dom.Element;
48import org.w3c.dom.NamedNodeMap;
49import org.w3c.dom.Node;
50import org.xml.sax.AttributeList;
51import org.xml.sax.Attributes;
52import org.xml.sax.SAXException;
53
54
55/**
56 * Implements an HTML/XHTML serializer supporting both DOM and SAX
57 * pretty serializing. HTML/XHTML mode is determined in the
58 * constructor.  For usage instructions see {@link Serializer}.
59 * <p>
60 * If an output stream is used, the encoding is taken from the
61 * output format (defaults to <tt>UTF-8</tt>). If a writer is
62 * used, make sure the writer uses the same encoding (if applies)
63 * as specified in the output format.
64 * <p>
65 * The serializer supports both DOM and SAX. DOM serializing is done
66 * by calling {@link #serialize} and SAX serializing is done by firing
67 * SAX events and using the serializer as a document handler.
68 * <p>
69 * If an I/O exception occurs while serializing, the serializer
70 * will not throw an exception directly, but only throw it
71 * at the end of serializing (either DOM or SAX's {@link
72 * org.xml.sax.DocumentHandler#endDocument}.
73 * <p>
74 * For elements that are not specified as whitespace preserving,
75 * the serializer will potentially break long text lines at space
76 * boundaries, indent lines, and serialize elements on separate
77 * lines. Line terminators will be regarded as spaces, and
78 * spaces at beginning of line will be stripped.
79 * <p>
80 * XHTML is slightly different than HTML:
81 * <ul>
82 * <li>Element/attribute names are lower case and case matters
83 * <li>Attributes must specify value, even if empty string
84 * <li>Empty elements must have '/' in empty tag
85 * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
86 * </ul>
87 *
88 * @deprecated This class was deprecated in Xerces 2.6.2. It is
89 * recommended that new applications use JAXP's Transformation API
90 * for XML (TrAX) for serializing HTML. See the Xerces documentation
91 * for more information.
92 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
93 * @see Serializer
94 */
95@Deprecated
96public class HTMLSerializer
97    extends BaseMarkupSerializer
98{
99
100
101    /**
102     * True if serializing in XHTML format.
103     */
104    private boolean _xhtml;
105
106
107    public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";
108
109    // for users to override XHTMLNamespace if need be.
110    private String fUserXHTMLNamespace = null;
111
112
113    /**
114     * Constructs a new HTML/XHTML serializer depending on the value of
115     * <tt>xhtml</tt>. The serializer cannot be used without calling
116     * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
117     *
118     * @param xhtml True if XHTML serializing
119     */
120    protected HTMLSerializer( boolean xhtml, OutputFormat format )
121    {
122        super( format );
123        _xhtml = xhtml;
124    }
125
126
127    /**
128     * Constructs a new serializer. The serializer cannot be used without
129     * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
130     * first.
131     */
132    public HTMLSerializer()
133    {
134        this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
135    }
136
137
138    /**
139     * Constructs a new serializer. The serializer cannot be used without
140     * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
141     * first.
142     */
143    public HTMLSerializer( OutputFormat format )
144    {
145        this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
146    }
147
148
149
150    /**
151     * Constructs a new serializer that writes to the specified writer
152     * using the specified output format. If <tt>format</tt> is null,
153     * will use a default output format.
154     *
155     * @param writer The writer to use
156     * @param format The output format to use, null for the default
157     */
158    public HTMLSerializer( Writer writer, OutputFormat format )
159    {
160        this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
161        setOutputCharStream( writer );
162    }
163
164
165    /**
166     * Constructs a new serializer that writes to the specified output
167     * stream using the specified output format. If <tt>format</tt>
168     * is null, will use a default output format.
169     *
170     * @param output The output stream to use
171     * @param format The output format to use, null for the default
172     */
173    public HTMLSerializer( OutputStream output, OutputFormat format )
174    {
175        this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
176        setOutputByteStream( output );
177    }
178
179
180    public void setOutputFormat( OutputFormat format )
181    {
182        super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
183    }
184
185    // Set  value for alternate XHTML namespace.
186    public void setXHTMLNamespace(String newNamespace) {
187        fUserXHTMLNamespace = newNamespace;
188    } // setXHTMLNamespace(String)
189
190    //-----------------------------------------//
191    // SAX content handler serializing methods //
192    //-----------------------------------------//
193
194
195    public void startElement( String namespaceURI, String localName,
196                              String rawName, Attributes attrs )
197        throws SAXException
198    {
199        int          i;
200        boolean      preserveSpace;
201        ElementState state;
202        String       name;
203        String       value;
204        String       htmlName;
205        boolean      addNSAttr = false;
206
207        try {
208            if ( _printer == null )
209                throw new IllegalStateException(
210                                    DOMMessageFormatter.formatMessage(
211                                    DOMMessageFormatter.SERIALIZER_DOMAIN,
212                    "NoWriterSupplied", null));
213
214            state = getElementState();
215            if ( isDocumentState() ) {
216                // If this is the root element handle it differently.
217                // If the first root element in the document, serialize
218                // the document's DOCTYPE. Space preserving defaults
219                // to that of the output format.
220                if ( ! _started )
221                    startDocument( (localName == null || localName.length() == 0)
222                        ? rawName : localName );
223            } else {
224                // For any other element, if first in parent, then
225                // close parent's opening tag and use the parnet's
226                // space preserving.
227                if ( state.empty )
228                    _printer.printText( '>' );
229                // Indent this element on a new line if the first
230                // content of the parent element or immediately
231                // following an element.
232                if ( _indenting && ! state.preserveSpace &&
233                     ( state.empty || state.afterElement ) )
234                    _printer.breakLine();
235            }
236            preserveSpace = state.preserveSpace;
237
238            // Do not change the current element state yet.
239            // This only happens in endElement().
240
241            // As per SAX2, the namespace URI is an empty string if the element has no
242            // namespace URI, or namespaces is turned off. The check against null protects
243            // against broken SAX implementations, so I've left it there. - mrglavas
244            boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
245
246            // SAX2: rawName (QName) could be empty string if
247            // namespace-prefixes property is false.
248            if ( rawName == null || rawName.length() == 0) {
249                rawName = localName;
250                if ( hasNamespaceURI ) {
251                    String prefix;
252                    prefix = getPrefix( namespaceURI );
253                    if ( prefix != null && prefix.length() != 0 )
254                        rawName = prefix + ":" + localName;
255                }
256                addNSAttr = true;
257            }
258            if ( !hasNamespaceURI )
259                htmlName = rawName;
260            else {
261                if ( namespaceURI.equals( XHTMLNamespace ) ||
262                        (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
263                    htmlName = localName;
264                else
265                    htmlName = null;
266            }
267
268            // XHTML: element names are lower case, DOM will be different
269            _printer.printText( '<' );
270            if ( _xhtml )
271                _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
272            else
273                _printer.printText( rawName );
274            _printer.indent();
275
276            // For each attribute serialize it's name and value as one part,
277            // separated with a space so the element can be broken on
278            // multiple lines.
279            if ( attrs != null ) {
280                for ( i = 0 ; i < attrs.getLength() ; ++i ) {
281                    _printer.printSpace();
282                    name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
283                    value = attrs.getValue( i );
284                    if ( _xhtml || hasNamespaceURI ) {
285                        // XHTML: print empty string for null values.
286                        if ( value == null ) {
287                            _printer.printText( name );
288                            _printer.printText( "=\"\"" );
289                        } else {
290                            _printer.printText( name );
291                            _printer.printText( "=\"" );
292                            printEscaped( value );
293                            _printer.printText( '"' );
294                        }
295                    } else {
296                        // HTML: Empty values print as attribute name, no value.
297                        // HTML: URI attributes will print unescaped
298                        if ( value == null ) {
299                            value = "";
300                        }
301                        if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
302                            _printer.printText( name );
303                        else if ( HTMLdtd.isURI( rawName, name ) ) {
304                            _printer.printText( name );
305                            _printer.printText( "=\"" );
306                            _printer.printText( escapeURI( value ) );
307                            _printer.printText( '"' );
308                        } else if ( HTMLdtd.isBoolean( rawName, name ) )
309                            _printer.printText( name );
310                        else {
311                            _printer.printText( name );
312                            _printer.printText( "=\"" );
313                            printEscaped( value );
314                            _printer.printText( '"' );
315                        }
316                    }
317                }
318            }
319            if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
320                preserveSpace = true;
321
322            if ( addNSAttr ) {
323                for (Map.Entry<String, String> entry : _prefixes.entrySet()) {
324                    _printer.printSpace();
325                    value = entry.getKey(); //The prefixes map uses the URI value as key.
326                    name = entry.getValue(); //and prefix name as value
327                    if ( name.length() == 0 ) {
328                        _printer.printText( "xmlns=\"" );
329                        printEscaped( value );
330                        _printer.printText( '"' );
331                    } else {
332                        _printer.printText( "xmlns:" );
333                        _printer.printText( name );
334                        _printer.printText( "=\"" );
335                        printEscaped( value );
336                        _printer.printText( '"' );
337                    }
338                }
339            }
340
341            // Now it's time to enter a new element state
342            // with the tag name and space preserving.
343            // We still do not change the curent element state.
344            state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
345
346            // Prevents line breaks inside A/TD
347
348            if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
349                                       htmlName.equalsIgnoreCase( "TD" ) ) ) {
350                state.empty = false;
351                _printer.printText( '>' );
352            }
353
354            // Handle SCRIPT and STYLE specifically by changing the
355            // state of the current element to CDATA (XHTML) or
356            // unescaped (HTML).
357            if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
358                                       rawName.equalsIgnoreCase( "STYLE" ) ) ) {
359                if ( _xhtml ) {
360                    // XHTML: Print contents as CDATA section
361                    state.doCData = true;
362                } else {
363                    // HTML: Print contents unescaped
364                    state.unescaped = true;
365                }
366            }
367        } catch ( IOException except ) {
368            throw new SAXException( except );
369        }
370    }
371
372
373    public void endElement( String namespaceURI, String localName,
374                            String rawName )
375        throws SAXException
376    {
377        try {
378            endElementIO( namespaceURI, localName, rawName );
379        } catch ( IOException except ) {
380            throw new SAXException( except );
381        }
382    }
383
384
385    public void endElementIO( String namespaceURI, String localName,
386                              String rawName )
387        throws IOException
388    {
389        ElementState state;
390        String       htmlName;
391
392        // Works much like content() with additions for closing
393        // an element. Note the different checks for the closed
394        // element's state and the parent element's state.
395        _printer.unindent();
396        state = getElementState();
397
398        if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
399            htmlName = state.rawName;
400        else {
401            if ( state.namespaceURI.equals( XHTMLNamespace ) ||
402                        (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
403                htmlName = state.localName;
404            else
405                htmlName = null;
406        }
407
408        if ( _xhtml) {
409            if ( state.empty ) {
410                _printer.printText( " />" );
411            } else {
412                // Must leave CData section first
413                if ( state.inCData )
414                    _printer.printText( "]]>" );
415                // XHTML: element names are lower case, DOM will be different
416                _printer.printText( "</" );
417                _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
418                _printer.printText( '>' );
419            }
420        } else {
421            if ( state.empty )
422                _printer.printText( '>' );
423            // This element is not empty and that last content was
424            // another element, so print a line break before that
425            // last element and this element's closing tag.
426            // [keith] Provided this is not an anchor.
427            // HTML: some elements do not print closing tag (e.g. LI)
428            if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
429                if ( _indenting && ! state.preserveSpace && state.afterElement )
430                    _printer.breakLine();
431                // Must leave CData section first (Illegal in HTML, but still)
432                if ( state.inCData )
433                    _printer.printText( "]]>" );
434                _printer.printText( "</" );
435                _printer.printText( state.rawName );
436                _printer.printText( '>' );
437            }
438        }
439        // Leave the element state and update that of the parent
440        // (if we're not root) to not empty and after element.
441        state = leaveElementState();
442        // Temporary hack to prevent line breaks inside A/TD
443        if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
444                                   ! htmlName.equalsIgnoreCase( "TD" ) ) )
445
446            state.afterElement = true;
447        state.empty = false;
448        if ( isDocumentState() )
449            _printer.flush();
450    }
451
452
453    //------------------------------------------//
454    // SAX document handler serializing methods //
455    //------------------------------------------//
456
457
458    public void characters( char[] chars, int start, int length )
459        throws SAXException
460    {
461        ElementState state;
462
463        try {
464            // HTML: no CDATA section
465            state = content();
466            state.doCData = false;
467            super.characters( chars, start, length );
468        } catch ( IOException except ) {
469            throw new SAXException( except );
470        }
471    }
472
473
474    public void startElement( String tagName, AttributeList attrs )
475        throws SAXException
476    {
477        int          i;
478        boolean      preserveSpace;
479        ElementState state;
480        String       name;
481        String       value;
482
483        try {
484            if ( _printer == null )
485                throw new IllegalStateException(
486                                    DOMMessageFormatter.formatMessage(
487                                    DOMMessageFormatter.SERIALIZER_DOMAIN,
488                    "NoWriterSupplied", null));
489
490
491            state = getElementState();
492            if ( isDocumentState() ) {
493                // If this is the root element handle it differently.
494                // If the first root element in the document, serialize
495                // the document's DOCTYPE. Space preserving defaults
496                // to that of the output format.
497                if ( ! _started )
498                    startDocument( tagName );
499            } else {
500                // For any other element, if first in parent, then
501                // close parent's opening tag and use the parnet's
502                // space preserving.
503                if ( state.empty )
504                    _printer.printText( '>' );
505                // Indent this element on a new line if the first
506                // content of the parent element or immediately
507                // following an element.
508                if ( _indenting && ! state.preserveSpace &&
509                     ( state.empty || state.afterElement ) )
510                    _printer.breakLine();
511            }
512            preserveSpace = state.preserveSpace;
513
514            // Do not change the current element state yet.
515            // This only happens in endElement().
516
517            // XHTML: element names are lower case, DOM will be different
518            _printer.printText( '<' );
519            if ( _xhtml )
520                _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
521            else
522                _printer.printText( tagName );
523            _printer.indent();
524
525            // For each attribute serialize it's name and value as one part,
526            // separated with a space so the element can be broken on
527            // multiple lines.
528            if ( attrs != null ) {
529                for ( i = 0 ; i < attrs.getLength() ; ++i ) {
530                    _printer.printSpace();
531                    name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
532                    value = attrs.getValue( i );
533                    if ( _xhtml ) {
534                        // XHTML: print empty string for null values.
535                        if ( value == null ) {
536                            _printer.printText( name );
537                            _printer.printText( "=\"\"" );
538                        } else {
539                            _printer.printText( name );
540                            _printer.printText( "=\"" );
541                            printEscaped( value );
542                            _printer.printText( '"' );
543                        }
544                    } else {
545                        // HTML: Empty values print as attribute name, no value.
546                        // HTML: URI attributes will print unescaped
547                        if ( value == null ) {
548                            value = "";
549                        }
550                        if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
551                            _printer.printText( name );
552                        else if ( HTMLdtd.isURI( tagName, name ) ) {
553                            _printer.printText( name );
554                            _printer.printText( "=\"" );
555                            _printer.printText( escapeURI( value ) );
556                            _printer.printText( '"' );
557                        } else if ( HTMLdtd.isBoolean( tagName, name ) )
558                            _printer.printText( name );
559                        else {
560                            _printer.printText( name );
561                            _printer.printText( "=\"" );
562                            printEscaped( value );
563                            _printer.printText( '"' );
564                        }
565                    }
566                }
567            }
568            if ( HTMLdtd.isPreserveSpace( tagName ) )
569                preserveSpace = true;
570
571            // Now it's time to enter a new element state
572            // with the tag name and space preserving.
573            // We still do not change the curent element state.
574            state = enterElementState( null, null, tagName, preserveSpace );
575
576            // Prevents line breaks inside A/TD
577            if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
578                state.empty = false;
579                _printer.printText( '>' );
580            }
581
582            // Handle SCRIPT and STYLE specifically by changing the
583            // state of the current element to CDATA (XHTML) or
584            // unescaped (HTML).
585            if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
586                 tagName.equalsIgnoreCase( "STYLE" ) ) {
587                if ( _xhtml ) {
588                    // XHTML: Print contents as CDATA section
589                    state.doCData = true;
590                } else {
591                    // HTML: Print contents unescaped
592                    state.unescaped = true;
593                }
594            }
595        } catch ( IOException except ) {
596            throw new SAXException( except );
597        }
598    }
599
600
601    public void endElement( String tagName )
602        throws SAXException
603    {
604        endElement( null, null, tagName );
605    }
606
607
608    //------------------------------------------//
609    // Generic node serializing methods methods //
610    //------------------------------------------//
611
612
613    /**
614     * Called to serialize the document's DOCTYPE by the root element.
615     * The document type declaration must name the root element,
616     * but the root element is only known when that element is serialized,
617     * and not at the start of the document.
618     * <p>
619     * This method will check if it has not been called before ({@link #_started}),
620     * will serialize the document type declaration, and will serialize all
621     * pre-root comments and PIs that were accumulated in the document
622     * (see {@link #serializePreRoot}). Pre-root will be serialized even if
623     * this is not the first root element of the document.
624     */
625    protected void startDocument( String rootTagName )
626        throws IOException
627    {
628        StringBuffer buffer;
629
630        // Not supported in HTML/XHTML, but we still have to switch
631        // out of DTD mode.
632        _printer.leaveDTD();
633        if ( ! _started ) {
634            // If the public and system identifiers were not specified
635            // in the output format, use the appropriate ones for HTML
636            // or XHTML.
637            if ( _docTypePublicId == null && _docTypeSystemId == null ) {
638                if ( _xhtml ) {
639                    _docTypePublicId = HTMLdtd.XHTMLPublicId;
640                    _docTypeSystemId = HTMLdtd.XHTMLSystemId;
641                } else {
642                    _docTypePublicId = HTMLdtd.HTMLPublicId;
643                    _docTypeSystemId = HTMLdtd.HTMLSystemId;
644                }
645            }
646
647            if ( ! _format.getOmitDocumentType() ) {
648                // XHTML: If public identifier and system identifier
649                //  specified, print them, else print just system identifier
650                // HTML: If public identifier specified, print it with
651                //  system identifier, if specified.
652                // XHTML requires that all element names are lower case, so the
653                // root on the DOCTYPE must be 'html'. - mrglavas
654                if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null )  ) {
655                    if (_xhtml) {
656                        _printer.printText( "<!DOCTYPE html PUBLIC " );
657                    }
658                    else {
659                        _printer.printText( "<!DOCTYPE HTML PUBLIC " );
660                    }
661                    printDoctypeURL( _docTypePublicId );
662                    if ( _docTypeSystemId != null ) {
663                        if ( _indenting ) {
664                            _printer.breakLine();
665                            _printer.printText( "                      " );
666                        } else
667                        _printer.printText( ' ' );
668                        printDoctypeURL( _docTypeSystemId );
669                    }
670                    _printer.printText( '>' );
671                    _printer.breakLine();
672                } else if ( _docTypeSystemId != null ) {
673                    if (_xhtml) {
674                        _printer.printText( "<!DOCTYPE html SYSTEM " );
675                    }
676                    else {
677                        _printer.printText( "<!DOCTYPE HTML SYSTEM " );
678                    }
679                    printDoctypeURL( _docTypeSystemId );
680                    _printer.printText( '>' );
681                    _printer.breakLine();
682                }
683            }
684        }
685
686        _started = true;
687        // Always serialize these, even if not te first root element.
688        serializePreRoot();
689    }
690
691
692    /**
693     * Called to serialize a DOM element. Equivalent to calling {@link
694     * #startElement}, {@link #endElement} and serializing everything
695     * inbetween, but better optimized.
696     */
697    protected void serializeElement( Element elem )
698        throws IOException
699    {
700        Attr         attr;
701        NamedNodeMap attrMap;
702        int          i;
703        Node         child;
704        ElementState state;
705        boolean      preserveSpace;
706        String       name;
707        String       value;
708        String       tagName;
709
710        tagName = elem.getTagName();
711        state = getElementState();
712        if ( isDocumentState() ) {
713            // If this is the root element handle it differently.
714            // If the first root element in the document, serialize
715            // the document's DOCTYPE. Space preserving defaults
716            // to that of the output format.
717            if ( ! _started )
718                startDocument( tagName );
719        } else {
720            // For any other element, if first in parent, then
721            // close parent's opening tag and use the parnet's
722            // space preserving.
723            if ( state.empty )
724                _printer.printText( '>' );
725            // Indent this element on a new line if the first
726            // content of the parent element or immediately
727            // following an element.
728            if ( _indenting && ! state.preserveSpace &&
729                 ( state.empty || state.afterElement ) )
730                _printer.breakLine();
731        }
732        preserveSpace = state.preserveSpace;
733
734        // Do not change the current element state yet.
735        // This only happens in endElement().
736
737        // XHTML: element names are lower case, DOM will be different
738        _printer.printText( '<' );
739        if ( _xhtml )
740            _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
741        else
742            _printer.printText( tagName );
743        _printer.indent();
744
745        // Lookup the element's attribute, but only print specified
746        // attributes. (Unspecified attributes are derived from the DTD.
747        // For each attribute print it's name and value as one part,
748        // separated with a space so the element can be broken on
749        // multiple lines.
750        attrMap = elem.getAttributes();
751        if ( attrMap != null ) {
752            for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
753                attr = (Attr) attrMap.item( i );
754                name = attr.getName().toLowerCase(Locale.ENGLISH);
755                value = attr.getValue();
756                if ( attr.getSpecified() ) {
757                    _printer.printSpace();
758                    if ( _xhtml ) {
759                        // XHTML: print empty string for null values.
760                        if ( value == null ) {
761                            _printer.printText( name );
762                            _printer.printText( "=\"\"" );
763                        } else {
764                            _printer.printText( name );
765                            _printer.printText( "=\"" );
766                            printEscaped( value );
767                            _printer.printText( '"' );
768                        }
769                    } else {
770                        // HTML: Empty values print as attribute name, no value.
771                        // HTML: URI attributes will print unescaped
772                        if ( value == null ) {
773                            value = "";
774                        }
775                        if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
776                            _printer.printText( name );
777                        else if ( HTMLdtd.isURI( tagName, name ) ) {
778                            _printer.printText( name );
779                            _printer.printText( "=\"" );
780                            _printer.printText( escapeURI( value ) );
781                            _printer.printText( '"' );
782                        } else if ( HTMLdtd.isBoolean( tagName, name ) )
783                            _printer.printText( name );
784                        else {
785                            _printer.printText( name );
786                            _printer.printText( "=\"" );
787                            printEscaped( value );
788                            _printer.printText( '"' );
789                        }
790                    }
791                }
792            }
793        }
794        if ( HTMLdtd.isPreserveSpace( tagName ) )
795            preserveSpace = true;
796
797        // If element has children, or if element is not an empty tag,
798        // serialize an opening tag.
799        if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
800            // Enter an element state, and serialize the children
801            // one by one. Finally, end the element.
802            state = enterElementState( null, null, tagName, preserveSpace );
803
804            // Prevents line breaks inside A/TD
805            if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
806                state.empty = false;
807                _printer.printText( '>' );
808            }
809
810            // Handle SCRIPT and STYLE specifically by changing the
811            // state of the current element to CDATA (XHTML) or
812            // unescaped (HTML).
813            if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
814                 tagName.equalsIgnoreCase( "STYLE" ) ) {
815                if ( _xhtml ) {
816                    // XHTML: Print contents as CDATA section
817                    state.doCData = true;
818                } else {
819                    // HTML: Print contents unescaped
820                    state.unescaped = true;
821                }
822            }
823            child = elem.getFirstChild();
824            while ( child != null ) {
825                serializeNode( child );
826                child = child.getNextSibling();
827            }
828            endElementIO( null, null, tagName );
829        } else {
830            _printer.unindent();
831            // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
832            // HTML: Empty tags are defined as such in DTD no in document.
833            if ( _xhtml )
834                _printer.printText( " />" );
835            else
836                _printer.printText( '>' );
837            // After element but parent element is no longer empty.
838            state.afterElement = true;
839            state.empty = false;
840            if ( isDocumentState() )
841                _printer.flush();
842        }
843    }
844
845
846
847    protected void characters( String text )
848        throws IOException
849    {
850        ElementState state;
851
852        // HTML: no CDATA section
853        state = content();
854        super.characters( text );
855    }
856
857
858    protected String getEntityRef( int ch )
859    {
860        return HTMLdtd.fromChar( ch );
861    }
862
863
864    protected String escapeURI( String uri )
865    {
866        int index;
867
868        // XXX  Apparently Netscape doesn't like if we escape the URI
869        //      using %nn, so we leave it as is, just remove any quotes.
870        index = uri.indexOf( "\"" );
871        if ( index >= 0 )
872            return uri.substring( 0, index );
873        else
874            return uri;
875    }
876
877
878}
879