1/*
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.xml.internal.bind;
27
28/**
29 * Processes white space normalization.
30 *
31 * @since 1.0
32 */
33public abstract class WhiteSpaceProcessor {
34
35// benchmarking (see test/src/ReplaceTest.java in the CVS Attic)
36// showed that this code is slower than the current code.
37//
38//    public static String replace(String text) {
39//        final int len = text.length();
40//        StringBuffer result = new StringBuffer(len);
41//
42//        for (int i = 0; i < len; i++) {
43//            char ch = text.charAt(i);
44//            if (isWhiteSpace(ch))
45//                result.append(' ');
46//            else
47//                result.append(ch);
48//        }
49//
50//        return result.toString();
51//    }
52
53    public static String replace(String text) {
54        return replace( (CharSequence)text ).toString();
55    }
56
57    /**
58     * @since 2.0
59     */
60    public static CharSequence replace(CharSequence text) {
61        int i=text.length()-1;
62
63        // look for the first whitespace char.
64        while( i>=0 && !isWhiteSpaceExceptSpace(text.charAt(i)) )
65            i--;
66
67        if( i<0 )
68            // no such whitespace. replace(text)==text.
69            return text;
70
71        // we now know that we need to modify the text.
72        // allocate a char array to do it.
73        StringBuilder buf = new StringBuilder(text);
74
75        buf.setCharAt(i--,' ');
76        for( ; i>=0; i-- )
77            if( isWhiteSpaceExceptSpace(buf.charAt(i)))
78                buf.setCharAt(i,' ');
79
80        return new String(buf);
81    }
82
83    /**
84     * Equivalent of {@link String#trim()}.
85     * @since 2.0
86     */
87    public static CharSequence trim(CharSequence text) {
88        int len = text.length();
89        int start = 0;
90
91        while( start<len && isWhiteSpace(text.charAt(start)) )
92            start++;
93
94        int end = len-1;
95
96        while( end>start && isWhiteSpace(text.charAt(end)) )
97            end--;
98
99        if(start==0 && end==len-1)
100            return text;    // no change
101        else
102            return text.subSequence(start,end+1);
103    }
104
105    public static String collapse(String text) {
106        return collapse( (CharSequence)text ).toString();
107    }
108
109    /**
110     * This is usually the biggest processing bottleneck.
111     *
112     * @since 2.0
113     */
114    public static CharSequence collapse(CharSequence text) {
115        int len = text.length();
116
117        // most of the texts are already in the collapsed form.
118        // so look for the first whitespace in the hope that we will
119        // never see it.
120        int s=0;
121        while(s<len) {
122            if(isWhiteSpace(text.charAt(s)))
123                break;
124            s++;
125        }
126        if(s==len)
127            // the input happens to be already collapsed.
128            return text;
129
130        // we now know that the input contains spaces.
131        // let's sit down and do the collapsing normally.
132
133        StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/ );
134
135        if(s!=0) {
136            for( int i=0; i<s; i++ )
137                result.append(text.charAt(i));
138            result.append(' ');
139        }
140
141        boolean inStripMode = true;
142        for (int i = s+1; i < len; i++) {
143            char ch = text.charAt(i);
144            boolean b = isWhiteSpace(ch);
145            if (inStripMode && b)
146                continue; // skip this character
147
148            inStripMode = b;
149            if (inStripMode)
150                result.append(' ');
151            else
152                result.append(ch);
153        }
154
155        // remove trailing whitespaces
156        len = result.length();
157        if (len > 0 && result.charAt(len - 1) == ' ')
158            result.setLength(len - 1);
159        // whitespaces are already collapsed,
160        // so all we have to do is to remove the last one character
161        // if it's a whitespace.
162
163        return result;
164    }
165
166    /**
167     * Returns true if the specified string is all whitespace.
168     */
169    public static boolean isWhiteSpace(CharSequence s) {
170        for( int i=s.length()-1; i>=0; i-- )
171            if(!isWhiteSpace(s.charAt(i)))
172                return false;
173        return true;
174    }
175
176    /** returns true if the specified char is a white space character. */
177    public static boolean isWhiteSpace(char ch) {
178        // most of the characters are non-control characters.
179        // so check that first to quickly return false for most of the cases.
180        if( ch>0x20 )   return false;
181
182        // other than we have to do four comparisons.
183        return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
184    }
185
186    /**
187     * Returns true if the specified char is a white space character
188     * but not 0x20.
189     */
190    protected static boolean isWhiteSpaceExceptSpace(char ch) {
191        // most of the characters are non-control characters.
192        // so check that first to quickly return false for most of the cases.
193        if( ch>=0x20 )   return false;
194
195        // other than we have to do four comparisons.
196        return ch == 0x9 || ch == 0xA || ch == 0xD;
197    }
198}
199