1/*
2 * Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.xml.internal.stream.writers;
27
28import java.io.Writer;
29import java.io.OutputStream;
30import java.io.IOException;
31
32import com.sun.org.apache.xerces.internal.util.XMLChar;
33
34/**
35 * <p>This class is used to write a stream of chars as a stream of
36 * bytes using the UTF8 encoding. It assumes that the underlying
37 * output stream is buffered or does not need additional buffering.</p>
38 *
39 * <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code>
40 * because it does not need to be wrapped in a
41 * <code>java.io.BufferedWriter</code>. Creating multiple instances
42 * of <code>java.io.BufferedWriter</code> has been shown to be very
43 * expensive in JAX-WS.</p>
44 *
45 * @author Santiago.PericasGeertsen@sun.com
46 */
47public final class UTF8OutputStreamWriter extends Writer {
48
49    /**
50     * Undelying output stream. This class assumes that this
51     * output stream does not need buffering.
52     */
53    OutputStream out;
54
55    /**
56     * Java represents chars that are not in the Basic Multilingual
57     * Plane (BMP) in UTF-16. This int stores the first code unit
58     * for a code point encoded in two UTF-16 code units.
59     */
60    int lastUTF16CodePoint = 0;
61
62    public UTF8OutputStreamWriter(OutputStream out) {
63        this.out = out;
64    }
65
66    public String getEncoding() {
67        return "UTF-8";
68    }
69
70    public void write(int c) throws IOException {
71        // Check in we are encoding at high and low surrogates
72        if (lastUTF16CodePoint != 0) {
73            final int uc =
74                (((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000;
75
76            if (uc < 0 || uc >= 0x200000) {
77                throw new IOException("Atttempting to write invalid Unicode code point '" + uc + "'");
78            }
79
80            out.write(0xF0 | (uc >> 18));
81            out.write(0x80 | ((uc >> 12) & 0x3F));
82            out.write(0x80 | ((uc >> 6) & 0x3F));
83            out.write(0x80 | (uc & 0x3F));
84
85            lastUTF16CodePoint = 0;
86            return;
87        }
88
89        // Otherwise, encode char as defined in UTF-8
90        if (c < 0x80) {
91            // 1 byte, 7 bits
92            out.write(c);
93        }
94        else if (c < 0x800) {
95            // 2 bytes, 11 bits
96            out.write(0xC0 | (c >> 6));    // first 5
97            out.write(0x80 | (c & 0x3F));  // second 6
98        }
99        else if (c <= '\uFFFF') {
100            if (!XMLChar.isHighSurrogate(c) && !XMLChar.isLowSurrogate(c)) {
101                // 3 bytes, 16 bits
102                out.write(0xE0 | (c >> 12));   // first 4
103                out.write(0x80 | ((c >> 6) & 0x3F));  // second 6
104                out.write(0x80 | (c & 0x3F));  // third 6
105            }
106            else {
107                lastUTF16CodePoint = c;
108            }
109        }
110    }
111
112    public void write(char cbuf[]) throws IOException {
113        for (int i = 0; i < cbuf.length; i++) {
114            write(cbuf[i]);
115        }
116    }
117
118    public void write(char cbuf[], int off, int len) throws IOException {
119        for (int i = 0; i < len; i++) {
120            write(cbuf[off + i]);
121        }
122    }
123
124    public void write(String str) throws IOException {
125        final int len = str.length();
126        for (int i = 0; i < len; i++) {
127            write(str.charAt(i));
128        }
129    }
130
131    public void write(String str, int off, int len) throws IOException {
132        for (int i = 0; i < len; i++) {
133            write(str.charAt(off + i));
134        }
135    }
136
137    public void flush() throws IOException {
138        out.flush();
139    }
140
141    public void close() throws IOException {
142        if (lastUTF16CodePoint != 0) {
143            throw new IllegalStateException("Attempting to close a UTF8OutputStreamWriter"
144                + " while awaiting for a UTF-16 code unit");
145        }
146        out.close();
147    }
148
149}
150