1/*
2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import java.util.Arrays;
29import java.util.Locale;
30import java.util.Spliterator;
31import java.util.function.IntConsumer;
32import jdk.internal.HotSpotIntrinsicCandidate;
33import jdk.internal.vm.annotation.ForceInline;
34import jdk.internal.vm.annotation.DontInline;
35
36import static java.lang.String.UTF16;
37import static java.lang.String.LATIN1;
38
39final class StringUTF16 {
40
41    public static byte[] newBytesFor(int len) {
42        if (len < 0) {
43            throw new NegativeArraySizeException();
44        }
45        if (len > MAX_LENGTH) {
46            throw new OutOfMemoryError("UTF16 String size is " + len +
47                                       ", should be less than " + MAX_LENGTH);
48        }
49        return new byte[len << 1];
50    }
51
52    @HotSpotIntrinsicCandidate
53    // intrinsic performs no bounds checks
54    static void putChar(byte[] val, int index, int c) {
55        assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
56        index <<= 1;
57        val[index++] = (byte)(c >> HI_BYTE_SHIFT);
58        val[index]   = (byte)(c >> LO_BYTE_SHIFT);
59    }
60
61    @HotSpotIntrinsicCandidate
62    // intrinsic performs no bounds checks
63    static char getChar(byte[] val, int index) {
64        assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
65        index <<= 1;
66        return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
67                      ((val[index]   & 0xff) << LO_BYTE_SHIFT));
68    }
69
70    public static int length(byte[] value) {
71        return value.length >> 1;
72    }
73
74    private static int codePointAt(byte[] value, int index, int end, boolean checked) {
75        assert index < end;
76        if (checked) {
77            checkIndex(index, value);
78        }
79        char c1 = getChar(value, index);
80        if (Character.isHighSurrogate(c1) && ++index < end) {
81            if (checked) {
82                checkIndex(index, value);
83            }
84            char c2 = getChar(value, index);
85            if (Character.isLowSurrogate(c2)) {
86               return Character.toCodePoint(c1, c2);
87            }
88        }
89        return c1;
90    }
91
92    public static int codePointAt(byte[] value, int index, int end) {
93       return codePointAt(value, index, end, false /* unchecked */);
94    }
95
96    private static int codePointBefore(byte[] value, int index, boolean checked) {
97        --index;
98        if (checked) {
99            checkIndex(index, value);
100        }
101        char c2 = getChar(value, index);
102        if (Character.isLowSurrogate(c2) && index > 0) {
103            --index;
104            if (checked) {
105                checkIndex(index, value);
106            }
107            char c1 = getChar(value, index);
108            if (Character.isHighSurrogate(c1)) {
109               return Character.toCodePoint(c1, c2);
110            }
111        }
112        return c2;
113    }
114
115    public static int codePointBefore(byte[] value, int index) {
116        return codePointBefore(value, index, false /* unchecked */);
117    }
118
119    private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) {
120        assert beginIndex <= endIndex;
121        int count = endIndex - beginIndex;
122        int i = beginIndex;
123        if (checked && i < endIndex) {
124            checkBoundsBeginEnd(i, endIndex, value);
125        }
126        for (; i < endIndex - 1; ) {
127            if (Character.isHighSurrogate(getChar(value, i++)) &&
128                Character.isLowSurrogate(getChar(value, i))) {
129                count--;
130                i++;
131            }
132        }
133        return count;
134    }
135
136    public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
137        return codePointCount(value, beginIndex, endIndex, false /* unchecked */);
138    }
139
140    public static char[] toChars(byte[] value) {
141        char[] dst = new char[value.length >> 1];
142        getChars(value, 0, dst.length, dst, 0);
143        return dst;
144    }
145
146    @HotSpotIntrinsicCandidate
147    public static byte[] toBytes(char[] value, int off, int len) {
148        byte[] val = newBytesFor(len);
149        for (int i = 0; i < len; i++) {
150            putChar(val, i, value[off]);
151            off++;
152        }
153        return val;
154    }
155
156    public static byte[] compress(char[] val, int off, int len) {
157        byte[] ret = new byte[len];
158        if (compress(val, off, ret, 0, len) == len) {
159            return ret;
160        }
161        return null;
162    }
163
164    public static byte[] compress(byte[] val, int off, int len) {
165        byte[] ret = new byte[len];
166        if (compress(val, off, ret, 0, len) == len) {
167            return ret;
168        }
169        return null;
170    }
171
172    // compressedCopy char[] -> byte[]
173    @HotSpotIntrinsicCandidate
174    public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
175        for (int i = 0; i < len; i++) {
176            char c = src[srcOff];
177            if (c > 0xFF) {
178                len = 0;
179                break;
180            }
181            dst[dstOff] = (byte)c;
182            srcOff++;
183            dstOff++;
184        }
185        return len;
186    }
187
188    // compressedCopy byte[] -> byte[]
189    @HotSpotIntrinsicCandidate
190    public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
191        // We need a range check here because 'getChar' has no checks
192        checkBoundsOffCount(srcOff, len, src);
193        for (int i = 0; i < len; i++) {
194            char c = getChar(src, srcOff);
195            if (c > 0xFF) {
196                len = 0;
197                break;
198            }
199            dst[dstOff] = (byte)c;
200            srcOff++;
201            dstOff++;
202        }
203        return len;
204    }
205
206    public static byte[] toBytes(int[] val, int index, int len) {
207        final int end = index + len;
208        // Pass 1: Compute precise size of char[]
209        int n = len;
210        for (int i = index; i < end; i++) {
211            int cp = val[i];
212            if (Character.isBmpCodePoint(cp))
213                continue;
214            else if (Character.isValidCodePoint(cp))
215                n++;
216            else throw new IllegalArgumentException(Integer.toString(cp));
217        }
218        // Pass 2: Allocate and fill in <high, low> pair
219        byte[] buf = newBytesFor(n);
220        for (int i = index, j = 0; i < end; i++, j++) {
221            int cp = val[i];
222            if (Character.isBmpCodePoint(cp)) {
223                putChar(buf, j, cp);
224            } else {
225                putChar(buf, j++, Character.highSurrogate(cp));
226                putChar(buf, j, Character.lowSurrogate(cp));
227            }
228        }
229        return buf;
230    }
231
232    public static byte[] toBytes(char c) {
233        byte[] result = new byte[2];
234        putChar(result, 0, c);
235        return result;
236    }
237
238    @HotSpotIntrinsicCandidate
239    public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
240        // We need a range check here because 'getChar' has no checks
241        if (srcBegin < srcEnd) {
242            checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value);
243        }
244        for (int i = srcBegin; i < srcEnd; i++) {
245            dst[dstBegin++] = getChar(value, i);
246        }
247    }
248
249    /* @see java.lang.String.getBytes(int, int, byte[], int) */
250    public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
251        srcBegin <<= 1;
252        srcEnd <<= 1;
253        for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
254            dst[dstBegin++] = value[i];
255        }
256    }
257
258    @HotSpotIntrinsicCandidate
259    public static boolean equals(byte[] value, byte[] other) {
260        if (value.length == other.length) {
261            int len = value.length >> 1;
262            for (int i = 0; i < len; i++) {
263                if (getChar(value, i) != getChar(other, i)) {
264                    return false;
265                }
266            }
267            return true;
268        }
269        return false;
270    }
271
272    @HotSpotIntrinsicCandidate
273    public static int compareTo(byte[] value, byte[] other) {
274        int len1 = length(value);
275        int len2 = length(other);
276        int lim = Math.min(len1, len2);
277        for (int k = 0; k < lim; k++) {
278            char c1 = getChar(value, k);
279            char c2 = getChar(other, k);
280            if (c1 != c2) {
281                return c1 - c2;
282            }
283        }
284        return len1 - len2;
285    }
286
287    @HotSpotIntrinsicCandidate
288    public static int compareToLatin1(byte[] value, byte[] other) {
289        return -StringLatin1.compareToUTF16(other, value);
290    }
291
292    public static int compareToCI(byte[] value, byte[] other) {
293        int len1 = length(value);
294        int len2 = length(other);
295        int lim = Math.min(len1, len2);
296        for (int k = 0; k < lim; k++) {
297            char c1 = getChar(value, k);
298            char c2 = getChar(other, k);
299            if (c1 != c2) {
300                c1 = Character.toUpperCase(c1);
301                c2 = Character.toUpperCase(c2);
302                if (c1 != c2) {
303                    c1 = Character.toLowerCase(c1);
304                    c2 = Character.toLowerCase(c2);
305                    if (c1 != c2) {
306                        return c1 - c2;
307                    }
308                }
309            }
310        }
311        return len1 - len2;
312    }
313
314    public static int compareToCI_Latin1(byte[] value, byte[] other) {
315        return -StringLatin1.compareToCI_UTF16(other, value);
316    }
317
318    public static int hashCode(byte[] value) {
319        int h = 0;
320        int length = value.length >> 1;
321        for (int i = 0; i < length; i++) {
322            h = 31 * h + getChar(value, i);
323        }
324        return h;
325    }
326
327    public static int indexOf(byte[] value, int ch, int fromIndex) {
328        int max = value.length >> 1;
329        if (fromIndex < 0) {
330            fromIndex = 0;
331        } else if (fromIndex >= max) {
332            // Note: fromIndex might be near -1>>>1.
333            return -1;
334        }
335        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
336            // handle most cases here (ch is a BMP code point or a
337            // negative value (invalid code point))
338            return indexOfChar(value, ch, fromIndex, max);
339        } else {
340            return indexOfSupplementary(value, ch, fromIndex, max);
341        }
342    }
343
344    @HotSpotIntrinsicCandidate
345    public static int indexOf(byte[] value, byte[] str) {
346        if (str.length == 0) {
347            return 0;
348        }
349        if (value.length < str.length) {
350            return -1;
351        }
352        return indexOfUnsafe(value, length(value), str, length(str), 0);
353    }
354
355    @HotSpotIntrinsicCandidate
356    public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
357        checkBoundsBeginEnd(fromIndex, valueCount, value);
358        checkBoundsBeginEnd(0, strCount, str);
359        return indexOfUnsafe(value, valueCount, str, strCount, fromIndex);
360    }
361
362
363    private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
364        assert fromIndex >= 0;
365        assert strCount > 0;
366        assert strCount <= length(str);
367        assert valueCount >= strCount;
368        char first = getChar(str, 0);
369        int max = (valueCount - strCount);
370        for (int i = fromIndex; i <= max; i++) {
371            // Look for first character.
372            if (getChar(value, i) != first) {
373                while (++i <= max && getChar(value, i) != first);
374            }
375            // Found first character, now look at the rest of value
376            if (i <= max) {
377                int j = i + 1;
378                int end = j + strCount - 1;
379                for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
380                if (j == end) {
381                    // Found whole string.
382                    return i;
383                }
384            }
385        }
386        return -1;
387    }
388
389
390    /**
391     * Handles indexOf Latin1 substring in UTF16 string.
392     */
393    @HotSpotIntrinsicCandidate
394    public static int indexOfLatin1(byte[] value, byte[] str) {
395        if (str.length == 0) {
396            return 0;
397        }
398        if (length(value) < str.length) {
399            return -1;
400        }
401        return indexOfLatin1Unsafe(value, length(value), str, str.length, 0);
402    }
403
404    @HotSpotIntrinsicCandidate
405    public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
406        checkBoundsBeginEnd(fromIndex, srcCount, src);
407        String.checkBoundsBeginEnd(0, tgtCount, tgt.length);
408        return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex);
409    }
410
411    public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
412        assert fromIndex >= 0;
413        assert tgtCount > 0;
414        assert tgtCount <= tgt.length;
415        assert srcCount >= tgtCount;
416        char first = (char)(tgt[0] & 0xff);
417        int max = (srcCount - tgtCount);
418        for (int i = fromIndex; i <= max; i++) {
419            // Look for first character.
420            if (getChar(src, i) != first) {
421                while (++i <= max && getChar(src, i) != first);
422            }
423            // Found first character, now look at the rest of v2
424            if (i <= max) {
425                int j = i + 1;
426                int end = j + tgtCount - 1;
427                for (int k = 1;
428                     j < end && getChar(src, j) == (tgt[k] & 0xff);
429                     j++, k++);
430                if (j == end) {
431                    // Found whole string.
432                    return i;
433                }
434            }
435        }
436        return -1;
437    }
438
439    @HotSpotIntrinsicCandidate
440    private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
441        checkBoundsBeginEnd(fromIndex, max, value);
442        return indexOfCharUnsafe(value, ch, fromIndex, max);
443    }
444
445    private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) {
446        for (int i = fromIndex; i < max; i++) {
447            if (getChar(value, i) == ch) {
448                return i;
449            }
450        }
451        return -1;
452    }
453
454    /**
455     * Handles (rare) calls of indexOf with a supplementary character.
456     */
457    private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
458        if (Character.isValidCodePoint(ch)) {
459            final char hi = Character.highSurrogate(ch);
460            final char lo = Character.lowSurrogate(ch);
461            checkBoundsBeginEnd(fromIndex, max, value);
462            for (int i = fromIndex; i < max - 1; i++) {
463                if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
464                    return i;
465                }
466            }
467        }
468        return -1;
469    }
470
471    // srcCoder == UTF16 && tgtCoder == UTF16
472    public static int lastIndexOf(byte[] src, int srcCount,
473                                  byte[] tgt, int tgtCount, int fromIndex) {
474        assert fromIndex >= 0;
475        assert tgtCount > 0;
476        assert tgtCount <= length(tgt);
477        int min = tgtCount - 1;
478        int i = min + fromIndex;
479        int strLastIndex = tgtCount - 1;
480
481        checkIndex(strLastIndex, tgt);
482        char strLastChar = getChar(tgt, strLastIndex);
483
484        checkIndex(i, src);
485
486    startSearchForLastChar:
487        while (true) {
488            while (i >= min && getChar(src, i) != strLastChar) {
489                i--;
490            }
491            if (i < min) {
492                return -1;
493            }
494            int j = i - 1;
495            int start = j - strLastIndex;
496            int k = strLastIndex - 1;
497            while (j > start) {
498                if (getChar(src, j--) != getChar(tgt, k--)) {
499                    i--;
500                    continue startSearchForLastChar;
501                }
502            }
503            return start + 1;
504        }
505    }
506
507    public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
508        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
509            // handle most cases here (ch is a BMP code point or a
510            // negative value (invalid code point))
511            int i = Math.min(fromIndex, (value.length >> 1) - 1);
512            for (; i >= 0; i--) {
513                if (getChar(value, i) == ch) {
514                    return i;
515                }
516            }
517            return -1;
518        } else {
519            return lastIndexOfSupplementary(value, ch, fromIndex);
520        }
521    }
522
523    /**
524     * Handles (rare) calls of lastIndexOf with a supplementary character.
525     */
526    private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
527        if (Character.isValidCodePoint(ch)) {
528            char hi = Character.highSurrogate(ch);
529            char lo = Character.lowSurrogate(ch);
530            int i = Math.min(fromIndex, (value.length >> 1) - 2);
531            for (; i >= 0; i--) {
532                if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
533                    return i;
534                }
535            }
536        }
537        return -1;
538    }
539
540    public static String replace(byte[] value, char oldChar, char newChar) {
541        int len = value.length >> 1;
542        int i = -1;
543        while (++i < len) {
544            if (getChar(value, i) == oldChar) {
545                break;
546            }
547        }
548        if (i < len) {
549            byte buf[] = new byte[value.length];
550            for (int j = 0; j < i; j++) {
551                putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
552            }
553            while (i < len) {
554                char c = getChar(value, i);
555                putChar(buf, i, c == oldChar ? newChar : c);
556                i++;
557           }
558           // Check if we should try to compress to latin1
559           if (String.COMPACT_STRINGS &&
560               !StringLatin1.canEncode(oldChar) &&
561               StringLatin1.canEncode(newChar)) {
562               byte[] val = compress(buf, 0, len);
563               if (val != null) {
564                   return new String(val, LATIN1);
565               }
566           }
567           return new String(buf, UTF16);
568        }
569        return null;
570    }
571
572    public static boolean regionMatchesCI(byte[] value, int toffset,
573                                          byte[] other, int ooffset, int len) {
574        int last = toffset + len;
575        assert toffset >= 0 && ooffset >= 0;
576        assert ooffset + len <= length(other);
577        assert last <= length(value);
578        while (toffset < last) {
579            char c1 = getChar(value, toffset++);
580            char c2 = getChar(other, ooffset++);
581            if (c1 == c2) {
582                continue;
583            }
584            // try converting both characters to uppercase.
585            // If the results match, then the comparison scan should
586            // continue.
587            char u1 = Character.toUpperCase(c1);
588            char u2 = Character.toUpperCase(c2);
589            if (u1 == u2) {
590                continue;
591            }
592            // Unfortunately, conversion to uppercase does not work properly
593            // for the Georgian alphabet, which has strange rules about case
594            // conversion.  So we need to make one last check before
595            // exiting.
596            if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
597                continue;
598            }
599            return false;
600        }
601        return true;
602    }
603
604    public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
605                                                 byte[] other, int ooffset,
606                                                 int len) {
607        return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len);
608    }
609
610    public static String toLowerCase(String str, byte[] value, Locale locale) {
611        if (locale == null) {
612            throw new NullPointerException();
613        }
614        int first;
615        boolean hasSurr = false;
616        final int len = value.length >> 1;
617
618        // Now check if there are any characters that need to be changed, or are surrogate
619        for (first = 0 ; first < len; first++) {
620            int cp = (int)getChar(value, first);
621            if (Character.isSurrogate((char)cp)) {
622                hasSurr = true;
623                break;
624            }
625            if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
626                break;
627            }
628        }
629        if (first == len)
630            return str;
631        byte[] result = new byte[value.length];
632        System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
633                                                            // lowerCase characters.
634        String lang = locale.getLanguage();
635        if (lang == "tr" || lang == "az" || lang == "lt") {
636            return toLowerCaseEx(str, value, result, first, locale, true);
637        }
638        if (hasSurr) {
639            return toLowerCaseEx(str, value, result, first, locale, false);
640        }
641        int bits = 0;
642        for (int i = first; i < len; i++) {
643            int cp = (int)getChar(value, i);
644            if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
645                Character.isSurrogate((char)cp)) {
646                return toLowerCaseEx(str, value, result, i, locale, false);
647            }
648            if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
649                return toLowerCaseEx(str, value, result, i, locale, true);
650            }
651            cp = Character.toLowerCase(cp);
652            if (!Character.isBmpCodePoint(cp)) {
653                return toLowerCaseEx(str, value, result, i, locale, false);
654            }
655            bits |= cp;
656            putChar(result, i, cp);
657        }
658        if (bits > 0xFF) {
659            return new String(result, UTF16);
660        } else {
661            return newString(result, 0, len);
662        }
663    }
664
665    private static String toLowerCaseEx(String str, byte[] value,
666                                        byte[] result, int first, Locale locale,
667                                        boolean localeDependent) {
668        assert(result.length == value.length);
669        assert(first >= 0);
670        int resultOffset = first;
671        int length = value.length >> 1;
672        int srcCount;
673        for (int i = first; i < length; i += srcCount) {
674            int srcChar = getChar(value, i);
675            int lowerChar;
676            char[] lowerCharArray;
677            srcCount = 1;
678            if (Character.isSurrogate((char)srcChar)) {
679                srcChar = codePointAt(value, i, length);
680                srcCount = Character.charCount(srcChar);
681            }
682            if (localeDependent ||
683                srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
684                srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
685                lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
686            } else {
687                lowerChar = Character.toLowerCase(srcChar);
688            }
689            if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
690                putChar(result, resultOffset++, lowerChar);
691            } else {
692                if (lowerChar == Character.ERROR) {
693                    lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
694                } else {
695                    lowerCharArray = Character.toChars(lowerChar);
696                }
697                /* Grow result if needed */
698                int mapLen = lowerCharArray.length;
699                if (mapLen > srcCount) {
700                    byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
701                    System.arraycopy(result, 0, result2, 0, resultOffset << 1);
702                    result = result2;
703                }
704                assert resultOffset >= 0;
705                assert resultOffset + mapLen <= length(result);
706                for (int x = 0; x < mapLen; ++x) {
707                    putChar(result, resultOffset++, lowerCharArray[x]);
708                }
709            }
710        }
711        return newString(result, 0, resultOffset);
712    }
713
714    public static String toUpperCase(String str, byte[] value, Locale locale) {
715        if (locale == null) {
716            throw new NullPointerException();
717        }
718        int first;
719        boolean hasSurr = false;
720        final int len = value.length >> 1;
721
722        // Now check if there are any characters that need to be changed, or are surrogate
723        for (first = 0 ; first < len; first++) {
724            int cp = (int)getChar(value, first);
725            if (Character.isSurrogate((char)cp)) {
726                hasSurr = true;
727                break;
728            }
729            if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
730                break;
731            }
732        }
733        if (first == len) {
734            return str;
735        }
736        byte[] result = new byte[value.length];
737        System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
738                                                           // upperCase characters.
739        String lang = locale.getLanguage();
740        if (lang == "tr" || lang == "az" || lang == "lt") {
741            return toUpperCaseEx(str, value, result, first, locale, true);
742        }
743        if (hasSurr) {
744            return toUpperCaseEx(str, value, result, first, locale, false);
745        }
746        int bits = 0;
747        for (int i = first; i < len; i++) {
748            int cp = (int)getChar(value, i);
749            if (Character.isSurrogate((char)cp)) {
750                return toUpperCaseEx(str, value, result, i, locale, false);
751            }
752            cp = Character.toUpperCaseEx(cp);
753            if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
754                return toUpperCaseEx(str, value, result, i, locale, false);
755            }
756            bits |= cp;
757            putChar(result, i, cp);
758        }
759        if (bits > 0xFF) {
760            return new String(result, UTF16);
761        } else {
762            return newString(result, 0, len);
763        }
764    }
765
766    private static String toUpperCaseEx(String str, byte[] value,
767                                        byte[] result, int first,
768                                        Locale locale, boolean localeDependent)
769    {
770        assert(result.length == value.length);
771        assert(first >= 0);
772        int resultOffset = first;
773        int length = value.length >> 1;
774        int srcCount;
775        for (int i = first; i < length; i += srcCount) {
776            int srcChar = getChar(value, i);
777            int upperChar;
778            char[] upperCharArray;
779            srcCount = 1;
780            if (Character.isSurrogate((char)srcChar)) {
781                srcChar = codePointAt(value, i, length);
782                srcCount = Character.charCount(srcChar);
783            }
784            if (localeDependent) {
785                upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
786            } else {
787                upperChar = Character.toUpperCaseEx(srcChar);
788            }
789            if (Character.isBmpCodePoint(upperChar)) {
790                putChar(result, resultOffset++, upperChar);
791            } else {
792                if (upperChar == Character.ERROR) {
793                    if (localeDependent) {
794                        upperCharArray =
795                            ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
796                    } else {
797                        upperCharArray = Character.toUpperCaseCharArray(srcChar);
798                    }
799                } else {
800                    upperCharArray = Character.toChars(upperChar);
801                }
802                /* Grow result if needed */
803                int mapLen = upperCharArray.length;
804                if (mapLen > srcCount) {
805                    byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
806                    System.arraycopy(result, 0, result2, 0, resultOffset << 1);
807                    result = result2;
808                }
809                assert resultOffset >= 0;
810                assert resultOffset + mapLen <= length(result);
811                for (int x = 0; x < mapLen; ++x) {
812                    putChar(result, resultOffset++, upperCharArray[x]);
813                }
814            }
815        }
816        return newString(result, 0, resultOffset);
817    }
818
819    public static String trim(byte[] value) {
820        int length = value.length >> 1;
821        int len = length;
822        int st = 0;
823        while (st < len && getChar(value, st) <= ' ') {
824            st++;
825        }
826        while (st < len && getChar(value, len - 1) <= ' ') {
827            len--;
828        }
829        return ((st > 0) || (len < length )) ?
830            new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
831            null;
832    }
833
834    private static void putChars(byte[] val, int index, char[] str, int off, int end) {
835        while (off < end) {
836            putChar(val, index++, str[off++]);
837        }
838    }
839
840    public static String newString(byte[] val, int index, int len) {
841        if (String.COMPACT_STRINGS) {
842            byte[] buf = compress(val, index, len);
843            if (buf != null) {
844                return new String(buf, LATIN1);
845            }
846        }
847        int last = index + len;
848        return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
849    }
850
851    public static void fillNull(byte[] val, int index, int end) {
852        Arrays.fill(val, index << 1, end << 1, (byte)0);
853    }
854
855    static class CharsSpliterator implements Spliterator.OfInt {
856        private final byte[] array;
857        private int index;        // current index, modified on advance/split
858        private final int fence;  // one past last index
859        private final int cs;
860
861        CharsSpliterator(byte[] array, int acs) {
862            this(array, 0, array.length >> 1, acs);
863        }
864
865        CharsSpliterator(byte[] array, int origin, int fence, int acs) {
866            this.array = array;
867            this.index = origin;
868            this.fence = fence;
869            this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
870                      | Spliterator.SUBSIZED;
871        }
872
873        @Override
874        public OfInt trySplit() {
875            int lo = index, mid = (lo + fence) >>> 1;
876            return (lo >= mid)
877                   ? null
878                   : new CharsSpliterator(array, lo, index = mid, cs);
879        }
880
881        @Override
882        public void forEachRemaining(IntConsumer action) {
883            byte[] a; int i, hi; // hoist accesses and checks from loop
884            if (action == null)
885                throw new NullPointerException();
886            if (((a = array).length >> 1) >= (hi = fence) &&
887                (i = index) >= 0 && i < (index = hi)) {
888                do {
889                    action.accept(charAt(a, i));
890                } while (++i < hi);
891            }
892        }
893
894        @Override
895        public boolean tryAdvance(IntConsumer action) {
896            if (action == null)
897                throw new NullPointerException();
898            int i = index;
899            if (i >= 0 && i < fence) {
900                action.accept(charAt(array, i));
901                index++;
902                return true;
903            }
904            return false;
905        }
906
907        @Override
908        public long estimateSize() { return (long)(fence - index); }
909
910        @Override
911        public int characteristics() {
912            return cs;
913        }
914    }
915
916    static class CodePointsSpliterator implements Spliterator.OfInt {
917        private final byte[] array;
918        private int index;        // current index, modified on advance/split
919        private final int fence;  // one past last index
920        private final int cs;
921
922        CodePointsSpliterator(byte[] array, int acs) {
923            this(array, 0, array.length >> 1, acs);
924        }
925
926        CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
927            this.array = array;
928            this.index = origin;
929            this.fence = fence;
930            this.cs = acs | Spliterator.ORDERED;
931        }
932
933        @Override
934        public OfInt trySplit() {
935            int lo = index, mid = (lo + fence) >>> 1;
936            if (lo >= mid)
937                return null;
938
939            int midOneLess;
940            // If the mid-point intersects a surrogate pair
941            if (Character.isLowSurrogate(charAt(array, mid)) &&
942                Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) {
943                // If there is only one pair it cannot be split
944                if (lo >= midOneLess)
945                    return null;
946                // Shift the mid-point to align with the surrogate pair
947                return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
948            }
949            return new CodePointsSpliterator(array, lo, index = mid, cs);
950        }
951
952        @Override
953        public void forEachRemaining(IntConsumer action) {
954            byte[] a; int i, hi; // hoist accesses and checks from loop
955            if (action == null)
956                throw new NullPointerException();
957            if (((a = array).length >> 1) >= (hi = fence) &&
958                (i = index) >= 0 && i < (index = hi)) {
959                do {
960                    i = advance(a, i, hi, action);
961                } while (i < hi);
962            }
963        }
964
965        @Override
966        public boolean tryAdvance(IntConsumer action) {
967            if (action == null)
968                throw new NullPointerException();
969            if (index >= 0 && index < fence) {
970                index = advance(array, index, fence, action);
971                return true;
972            }
973            return false;
974        }
975
976        // Advance one code point from the index, i, and return the next
977        // index to advance from
978        private static int advance(byte[] a, int i, int hi, IntConsumer action) {
979            char c1 = charAt(a, i++);
980            int cp = c1;
981            if (Character.isHighSurrogate(c1) && i < hi) {
982                char c2 = charAt(a, i);
983                if (Character.isLowSurrogate(c2)) {
984                    i++;
985                    cp = Character.toCodePoint(c1, c2);
986                }
987            }
988            action.accept(cp);
989            return i;
990        }
991
992        @Override
993        public long estimateSize() { return (long)(fence - index); }
994
995        @Override
996        public int characteristics() {
997            return cs;
998        }
999    }
1000
1001    ////////////////////////////////////////////////////////////////
1002
1003    public static void putCharSB(byte[] val, int index, int c) {
1004        checkIndex(index, val);
1005        putChar(val, index, c);
1006    }
1007
1008    public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
1009        checkBoundsBeginEnd(index, index + end - off, val);
1010        putChars(val, index, ca, off, end);
1011    }
1012
1013    public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
1014        checkBoundsBeginEnd(index, index + end - off, val);
1015        for (int i = off; i < end; i++) {
1016            putChar(val, index++, s.charAt(i));
1017        }
1018    }
1019
1020    public static int codePointAtSB(byte[] val, int index, int end) {
1021        return codePointAt(val, index, end, true /* checked */);
1022    }
1023
1024    public static int codePointBeforeSB(byte[] val, int index) {
1025        return codePointBefore(val, index, true /* checked */);
1026    }
1027
1028    public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
1029        return codePointCount(val, beginIndex, endIndex, true /* checked */);
1030    }
1031
1032    public static int getChars(int i, int begin, int end, byte[] value) {
1033        checkBoundsBeginEnd(begin, end, value);
1034        int pos = getChars(i, end, value);
1035        assert begin == pos;
1036        return pos;
1037    }
1038
1039    public static int getChars(long l, int begin, int end, byte[] value) {
1040        checkBoundsBeginEnd(begin, end, value);
1041        int pos = getChars(l, end, value);
1042        assert begin == pos;
1043        return pos;
1044    }
1045
1046    public static boolean contentEquals(byte[] v1, byte[] v2, int len) {
1047        checkBoundsOffCount(0, len, v2);
1048        for (int i = 0; i < len; i++) {
1049            if ((char)(v1[i] & 0xff) != getChar(v2, i)) {
1050                return false;
1051            }
1052        }
1053        return true;
1054    }
1055
1056    public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
1057        checkOffset(len, value);
1058        for (int i = 0; i < len; i++) {
1059            if (getChar(value, i) != cs.charAt(i)) {
1060                return false;
1061            }
1062        }
1063        return true;
1064    }
1065
1066    public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
1067        int end = i + 4;
1068        checkBoundsBeginEnd(i, end, value);
1069        putChar(value, i++, c1);
1070        putChar(value, i++, c2);
1071        putChar(value, i++, c3);
1072        putChar(value, i++, c4);
1073        assert(i == end);
1074        return end;
1075    }
1076
1077    public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
1078        int end = i + 5;
1079        checkBoundsBeginEnd(i, end, value);
1080        putChar(value, i++, c1);
1081        putChar(value, i++, c2);
1082        putChar(value, i++, c3);
1083        putChar(value, i++, c4);
1084        putChar(value, i++, c5);
1085        assert(i == end);
1086        return end;
1087    }
1088
1089    public static char charAt(byte[] value, int index) {
1090        checkIndex(index, value);
1091        return getChar(value, index);
1092    }
1093
1094    public static void reverse(byte[] val, int count) {
1095        checkOffset(count, val);
1096        int n = count - 1;
1097        boolean hasSurrogates = false;
1098        for (int j = (n-1) >> 1; j >= 0; j--) {
1099            int k = n - j;
1100            char cj = getChar(val, j);
1101            char ck = getChar(val, k);
1102            putChar(val, j, ck);
1103            putChar(val, k, cj);
1104            if (Character.isSurrogate(cj) ||
1105                Character.isSurrogate(ck)) {
1106                hasSurrogates = true;
1107            }
1108        }
1109        if (hasSurrogates) {
1110            reverseAllValidSurrogatePairs(val, count);
1111        }
1112    }
1113
1114    /** Outlined helper method for reverse() */
1115    private static void reverseAllValidSurrogatePairs(byte[] val, int count) {
1116        for (int i = 0; i < count - 1; i++) {
1117            char c2 = getChar(val, i);
1118            if (Character.isLowSurrogate(c2)) {
1119                char c1 = getChar(val, i + 1);
1120                if (Character.isHighSurrogate(c1)) {
1121                    putChar(val, i++, c1);
1122                    putChar(val, i, c2);
1123                }
1124            }
1125        }
1126    }
1127
1128    // inflatedCopy byte[] -> byte[]
1129    public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
1130        // We need a range check here because 'putChar' has no checks
1131        checkBoundsOffCount(dstOff, len, dst);
1132        for (int i = 0; i < len; i++) {
1133            putChar(dst, dstOff++, src[srcOff++] & 0xff);
1134        }
1135    }
1136
1137    // srcCoder == UTF16 && tgtCoder == LATIN1
1138    public static int lastIndexOfLatin1(byte[] src, int srcCount,
1139                                        byte[] tgt, int tgtCount, int fromIndex) {
1140        assert fromIndex >= 0;
1141        assert tgtCount > 0;
1142        assert tgtCount <= tgt.length;
1143        int min = tgtCount - 1;
1144        int i = min + fromIndex;
1145        int strLastIndex = tgtCount - 1;
1146
1147        char strLastChar = (char)(tgt[strLastIndex] & 0xff);
1148
1149        checkIndex(i, src);
1150
1151    startSearchForLastChar:
1152        while (true) {
1153            while (i >= min && getChar(src, i) != strLastChar) {
1154                i--;
1155            }
1156            if (i < min) {
1157                return -1;
1158            }
1159            int j = i - 1;
1160            int start = j - strLastIndex;
1161            int k = strLastIndex - 1;
1162            while (j > start) {
1163                if (getChar(src, j--) != (tgt[k--] & 0xff)) {
1164                    i--;
1165                    continue startSearchForLastChar;
1166                }
1167            }
1168            return start + 1;
1169        }
1170    }
1171
1172    ////////////////////////////////////////////////////////////////
1173
1174    private static native boolean isBigEndian();
1175
1176    static final int HI_BYTE_SHIFT;
1177    static final int LO_BYTE_SHIFT;
1178    static {
1179        if (isBigEndian()) {
1180            HI_BYTE_SHIFT = 8;
1181            LO_BYTE_SHIFT = 0;
1182        } else {
1183            HI_BYTE_SHIFT = 0;
1184            LO_BYTE_SHIFT = 8;
1185        }
1186    }
1187
1188    static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
1189
1190    // Used by trusted callers.  Assumes all necessary bounds checks have
1191    // been done by the caller.
1192
1193    /**
1194     * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
1195     * UTF-16 coder.
1196     *
1197     * @param i     value to convert
1198     * @param index next index, after the least significant digit
1199     * @param buf   target buffer, UTF16-coded.
1200     * @return index of the most significant digit or minus sign, if present
1201     */
1202    static int getChars(int i, int index, byte[] buf) {
1203        int q, r;
1204        int charPos = index;
1205
1206        boolean negative = (i < 0);
1207        if (!negative) {
1208            i = -i;
1209        }
1210
1211        // Get 2 digits/iteration using ints
1212        while (i <= -100) {
1213            q = i / 100;
1214            r = (q * 100) - i;
1215            i = q;
1216            putChar(buf, --charPos, Integer.DigitOnes[r]);
1217            putChar(buf, --charPos, Integer.DigitTens[r]);
1218        }
1219
1220        // We know there are at most two digits left at this point.
1221        q = i / 10;
1222        r = (q * 10) - i;
1223        putChar(buf, --charPos, '0' + r);
1224
1225        // Whatever left is the remaining digit.
1226        if (q < 0) {
1227            putChar(buf, --charPos, '0' - q);
1228        }
1229
1230        if (negative) {
1231            putChar(buf, --charPos, '-');
1232        }
1233        return charPos;
1234    }
1235
1236    /**
1237     * This is a variant of {@link Long#getChars(long, int, byte[])}, but for
1238     * UTF-16 coder.
1239     *
1240     * @param i     value to convert
1241     * @param index next index, after the least significant digit
1242     * @param buf   target buffer, UTF16-coded.
1243     * @return index of the most significant digit or minus sign, if present
1244     */
1245    static int getChars(long i, int index, byte[] buf) {
1246        long q;
1247        int r;
1248        int charPos = index;
1249
1250        boolean negative = (i < 0);
1251        if (!negative) {
1252            i = -i;
1253        }
1254
1255        // Get 2 digits/iteration using longs until quotient fits into an int
1256        while (i <= Integer.MIN_VALUE) {
1257            q = i / 100;
1258            r = (int)((q * 100) - i);
1259            i = q;
1260            putChar(buf, --charPos, Integer.DigitOnes[r]);
1261            putChar(buf, --charPos, Integer.DigitTens[r]);
1262        }
1263
1264        // Get 2 digits/iteration using ints
1265        int q2;
1266        int i2 = (int)i;
1267        while (i2 <= -100) {
1268            q2 = i2 / 100;
1269            r  = (q2 * 100) - i2;
1270            i2 = q2;
1271            putChar(buf, --charPos, Integer.DigitOnes[r]);
1272            putChar(buf, --charPos, Integer.DigitTens[r]);
1273        }
1274
1275        // We know there are at most two digits left at this point.
1276        q2 = i2 / 10;
1277        r  = (q2 * 10) - i2;
1278        putChar(buf, --charPos, '0' + r);
1279
1280        // Whatever left is the remaining digit.
1281        if (q2 < 0) {
1282            putChar(buf, --charPos, '0' - q2);
1283        }
1284
1285        if (negative) {
1286            putChar(buf, --charPos, '-');
1287        }
1288        return charPos;
1289    }
1290    // End of trusted methods.
1291
1292    public static void checkIndex(int off, byte[] val) {
1293        String.checkIndex(off, length(val));
1294    }
1295
1296    public static void checkOffset(int off, byte[] val) {
1297        String.checkOffset(off, length(val));
1298    }
1299
1300    public static void checkBoundsBeginEnd(int begin, int end, byte[] val) {
1301        String.checkBoundsBeginEnd(begin, end, length(val));
1302    }
1303
1304    public static void checkBoundsOffCount(int offset, int count, byte[] val) {
1305        String.checkBoundsOffCount(offset, count, length(val));
1306    }
1307
1308}
1309