1/*
2**********************************************************************
3*   Copyright (C) 1998-2010, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   09/25/98    stephen     Creation.
13*   11/11/98    stephen     Changed per 11/9 code review.
14*   04/20/99    stephen     Overhauled per 4/16 code review.
15*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
16*                           handleReplaceBetween(); other methods unchanged.
17*   06/25/01    grhoten     Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
24/**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
29#include "unicode/utypes.h"
30#include "unicode/rep.h"
31#include "unicode/std_string.h"
32#include "unicode/stringpiece.h"
33#include "unicode/bytestream.h"
34
35struct UConverter;          // unicode/ucnv.h
36class  StringThreadTest;
37
38#ifndef U_COMPARE_CODE_POINT_ORDER
39/* see also ustring.h and unorm.h */
40/**
41 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
42 * Compare strings in code point order instead of code unit order.
43 * @stable ICU 2.2
44 */
45#define U_COMPARE_CODE_POINT_ORDER  0x8000
46#endif
47
48#ifndef USTRING_H
49/**
50 * \ingroup ustring_ustrlen
51 */
52U_STABLE int32_t U_EXPORT2
53u_strlen(const UChar *s);
54#endif
55
56U_NAMESPACE_BEGIN
57
58class Locale;               // unicode/locid.h
59class StringCharacterIterator;
60class BreakIterator;        // unicode/brkiter.h
61
62/* The <iostream> include has been moved to unicode/ustream.h */
63
64/**
65 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
66 * which constructs a Unicode string from an invariant-character char * string.
67 * About invariant characters see utypes.h.
68 * This constructor has no runtime dependency on conversion code and is
69 * therefore recommended over ones taking a charset name string
70 * (where the empty string "" indicates invariant-character conversion).
71 *
72 * @stable ICU 3.2
73 */
74#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
75
76/**
77 * Unicode String literals in C++.
78 * Dependent on the platform properties, different UnicodeString
79 * constructors should be used to create a UnicodeString object from
80 * a string literal.
81 * The macros are defined for maximum performance.
82 * They work only for strings that contain "invariant characters", i.e.,
83 * only latin letters, digits, and some punctuation.
84 * See utypes.h for details.
85 *
86 * The string parameter must be a C string literal.
87 * The length of the string, not including the terminating
88 * <code>NUL</code>, must be specified as a constant.
89 * The U_STRING_DECL macro should be invoked exactly once for one
90 * such string variable before it is used.
91 * @stable ICU 2.0
92 */
93#if defined(U_DECLARE_UTF16)
94#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
95#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
96#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
97#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
98#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
99#else
100#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
101#endif
102
103/**
104 * Unicode String literals in C++.
105 * Dependent on the platform properties, different UnicodeString
106 * constructors should be used to create a UnicodeString object from
107 * a string literal.
108 * The macros are defined for improved performance.
109 * They work only for strings that contain "invariant characters", i.e.,
110 * only latin letters, digits, and some punctuation.
111 * See utypes.h for details.
112 *
113 * The string parameter must be a C string literal.
114 * @stable ICU 2.0
115 */
116#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
117
118/**
119 * UnicodeString is a string class that stores Unicode characters directly and provides
120 * similar functionality as the Java String and StringBuffer classes.
121 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
122 *
123 * The UnicodeString class is not suitable for subclassing.
124 *
125 * <p>For an overview of Unicode strings in C and C++ see the
126 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
127 *
128 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
129 * A Unicode character may be stored with either one code unit
130 * (the most common case) or with a matched pair of special code units
131 * ("surrogates"). The data type for code units is UChar.
132 * For single-character handling, a Unicode character code <em>point</em> is a value
133 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
134 *
135 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
136 * This is the same as with multi-byte char* strings in traditional string handling.
137 * Operations on partial strings typically do not test for code point boundaries.
138 * If necessary, the user needs to take care of such boundaries by testing for the code unit
139 * values or by using functions like
140 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
141 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
142 *
143 * UnicodeString methods are more lenient with regard to input parameter values
144 * than other ICU APIs. In particular:
145 * - If indexes are out of bounds for a UnicodeString object
146 *   (<0 or >length()) then they are "pinned" to the nearest boundary.
147 * - If primitive string pointer values (e.g., const UChar * or char *)
148 *   for input strings are NULL, then those input string parameters are treated
149 *   as if they pointed to an empty string.
150 *   However, this is <em>not</em> the case for char * parameters for charset names
151 *   or other IDs.
152 * - Most UnicodeString methods do not take a UErrorCode parameter because
153 *   there are usually very few opportunities for failure other than a shortage
154 *   of memory, error codes in low-level C++ string methods would be inconvenient,
155 *   and the error code as the last parameter (ICU convention) would prevent
156 *   the use of default parameter values.
157 *   Instead, such methods set the UnicodeString into a "bogus" state
158 *   (see isBogus()) if an error occurs.
159 *
160 * In string comparisons, two UnicodeString objects that are both "bogus"
161 * compare equal (to be transitive and prevent endless loops in sorting),
162 * and a "bogus" string compares less than any non-"bogus" one.
163 *
164 * Const UnicodeString methods are thread-safe. Multiple threads can use
165 * const methods on the same UnicodeString object simultaneously,
166 * but non-const methods must not be called concurrently (in multiple threads)
167 * with any other (const or non-const) methods.
168 *
169 * Similarly, const UnicodeString & parameters are thread-safe.
170 * One object may be passed in as such a parameter concurrently in multiple threads.
171 * This includes the const UnicodeString & parameters for
172 * copy construction, assignment, and cloning.
173 *
174 * <p>UnicodeString uses several storage methods.
175 * String contents can be stored inside the UnicodeString object itself,
176 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
177 * Most of this is done transparently, but careful aliasing in particular provides
178 * significant performance improvements.
179 * Also, the internal buffer is accessible via special functions.
180 * For details see the
181 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
182 *
183 * @see utf.h
184 * @see CharacterIterator
185 * @stable ICU 2.0
186 */
187class U_COMMON_API UnicodeString : public Replaceable
188{
189public:
190
191  /**
192   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
193   * which constructs a Unicode string from an invariant-character char * string.
194   * Use the macro US_INV instead of the full qualification for this value.
195   *
196   * @see US_INV
197   * @stable ICU 3.2
198   */
199  enum EInvariant {
200    /**
201     * @see EInvariant
202     * @stable ICU 3.2
203     */
204    kInvariant
205  };
206
207  //========================================
208  // Read-only operations
209  //========================================
210
211  /* Comparison - bitwise only - for international comparison use collation */
212
213  /**
214   * Equality operator. Performs only bitwise comparison.
215   * @param text The UnicodeString to compare to this one.
216   * @return TRUE if <TT>text</TT> contains the same characters as this one,
217   * FALSE otherwise.
218   * @stable ICU 2.0
219   */
220  inline UBool operator== (const UnicodeString& text) const;
221
222  /**
223   * Inequality operator. Performs only bitwise comparison.
224   * @param text The UnicodeString to compare to this one.
225   * @return FALSE if <TT>text</TT> contains the same characters as this one,
226   * TRUE otherwise.
227   * @stable ICU 2.0
228   */
229  inline UBool operator!= (const UnicodeString& text) const;
230
231  /**
232   * Greater than operator. Performs only bitwise comparison.
233   * @param text The UnicodeString to compare to this one.
234   * @return TRUE if the characters in this are bitwise
235   * greater than the characters in <code>text</code>, FALSE otherwise
236   * @stable ICU 2.0
237   */
238  inline UBool operator> (const UnicodeString& text) const;
239
240  /**
241   * Less than operator. Performs only bitwise comparison.
242   * @param text The UnicodeString to compare to this one.
243   * @return TRUE if the characters in this are bitwise
244   * less than the characters in <code>text</code>, FALSE otherwise
245   * @stable ICU 2.0
246   */
247  inline UBool operator< (const UnicodeString& text) const;
248
249  /**
250   * Greater than or equal operator. Performs only bitwise comparison.
251   * @param text The UnicodeString to compare to this one.
252   * @return TRUE if the characters in this are bitwise
253   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
254   * @stable ICU 2.0
255   */
256  inline UBool operator>= (const UnicodeString& text) const;
257
258  /**
259   * Less than or equal operator. Performs only bitwise comparison.
260   * @param text The UnicodeString to compare to this one.
261   * @return TRUE if the characters in this are bitwise
262   * less than or equal to the characters in <code>text</code>, FALSE otherwise
263   * @stable ICU 2.0
264   */
265  inline UBool operator<= (const UnicodeString& text) const;
266
267  /**
268   * Compare the characters bitwise in this UnicodeString to
269   * the characters in <code>text</code>.
270   * @param text The UnicodeString to compare to this one.
271   * @return The result of bitwise character comparison: 0 if this
272   * contains the same characters as <code>text</code>, -1 if the characters in
273   * this are bitwise less than the characters in <code>text</code>, +1 if the
274   * characters in this are bitwise greater than the characters
275   * in <code>text</code>.
276   * @stable ICU 2.0
277   */
278  inline int8_t compare(const UnicodeString& text) const;
279
280  /**
281   * Compare the characters bitwise in the range
282   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
283   * in <TT>text</TT>
284   * @param start the offset at which the compare operation begins
285   * @param length the number of characters of text to compare.
286   * @param text the other text to be compared against this string.
287   * @return The result of bitwise character comparison: 0 if this
288   * contains the same characters as <code>text</code>, -1 if the characters in
289   * this are bitwise less than the characters in <code>text</code>, +1 if the
290   * characters in this are bitwise greater than the characters
291   * in <code>text</code>.
292   * @stable ICU 2.0
293   */
294  inline int8_t compare(int32_t start,
295         int32_t length,
296         const UnicodeString& text) const;
297
298  /**
299   * Compare the characters bitwise in the range
300   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
301   * in <TT>srcText</TT> in the range
302   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
303   * @param start the offset at which the compare operation begins
304   * @param length the number of characters in this to compare.
305   * @param srcText the text to be compared
306   * @param srcStart the offset into <TT>srcText</TT> to start comparison
307   * @param srcLength the number of characters in <TT>src</TT> to compare
308   * @return The result of bitwise character comparison: 0 if this
309   * contains the same characters as <code>srcText</code>, -1 if the characters in
310   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
311   * characters in this are bitwise greater than the characters
312   * in <code>srcText</code>.
313   * @stable ICU 2.0
314   */
315   inline int8_t compare(int32_t start,
316         int32_t length,
317         const UnicodeString& srcText,
318         int32_t srcStart,
319         int32_t srcLength) const;
320
321  /**
322   * Compare the characters bitwise in this UnicodeString with the first
323   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
324   * @param srcChars The characters to compare to this UnicodeString.
325   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
326   * @return The result of bitwise character comparison: 0 if this
327   * contains the same characters as <code>srcChars</code>, -1 if the characters in
328   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
329   * characters in this are bitwise greater than the characters
330   * in <code>srcChars</code>.
331   * @stable ICU 2.0
332   */
333  inline int8_t compare(const UChar *srcChars,
334         int32_t srcLength) const;
335
336  /**
337   * Compare the characters bitwise in the range
338   * [<TT>start</TT>, <TT>start + length</TT>) with the first
339   * <TT>length</TT> characters in <TT>srcChars</TT>
340   * @param start the offset at which the compare operation begins
341   * @param length the number of characters to compare.
342   * @param srcChars the characters to be compared
343   * @return The result of bitwise character comparison: 0 if this
344   * contains the same characters as <code>srcChars</code>, -1 if the characters in
345   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
346   * characters in this are bitwise greater than the characters
347   * in <code>srcChars</code>.
348   * @stable ICU 2.0
349   */
350  inline int8_t compare(int32_t start,
351         int32_t length,
352         const UChar *srcChars) const;
353
354  /**
355   * Compare the characters bitwise in the range
356   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
357   * in <TT>srcChars</TT> in the range
358   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
359   * @param start the offset at which the compare operation begins
360   * @param length the number of characters in this to compare
361   * @param srcChars the characters to be compared
362   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
363   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
364   * @return The result of bitwise character comparison: 0 if this
365   * contains the same characters as <code>srcChars</code>, -1 if the characters in
366   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
367   * characters in this are bitwise greater than the characters
368   * in <code>srcChars</code>.
369   * @stable ICU 2.0
370   */
371  inline int8_t compare(int32_t start,
372         int32_t length,
373         const UChar *srcChars,
374         int32_t srcStart,
375         int32_t srcLength) const;
376
377  /**
378   * Compare the characters bitwise in the range
379   * [<TT>start</TT>, <TT>limit</TT>) with the characters
380   * in <TT>srcText</TT> in the range
381   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
382   * @param start the offset at which the compare operation begins
383   * @param limit the offset immediately following the compare operation
384   * @param srcText the text to be compared
385   * @param srcStart the offset into <TT>srcText</TT> to start comparison
386   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
387   * @return The result of bitwise character comparison: 0 if this
388   * contains the same characters as <code>srcText</code>, -1 if the characters in
389   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
390   * characters in this are bitwise greater than the characters
391   * in <code>srcText</code>.
392   * @stable ICU 2.0
393   */
394  inline int8_t compareBetween(int32_t start,
395            int32_t limit,
396            const UnicodeString& srcText,
397            int32_t srcStart,
398            int32_t srcLimit) const;
399
400  /**
401   * Compare two Unicode strings in code point order.
402   * The result may be different from the results of compare(), operator<, etc.
403   * if supplementary characters are present:
404   *
405   * In UTF-16, supplementary characters (with code points U+10000 and above) are
406   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
407   * which means that they compare as less than some other BMP characters like U+feff.
408   * This function compares Unicode strings in code point order.
409   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
410   *
411   * @param text Another string to compare this one to.
412   * @return a negative/zero/positive integer corresponding to whether
413   * this string is less than/equal to/greater than the second one
414   * in code point order
415   * @stable ICU 2.0
416   */
417  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
418
419  /**
420   * Compare two Unicode strings in code point order.
421   * The result may be different from the results of compare(), operator<, etc.
422   * if supplementary characters are present:
423   *
424   * In UTF-16, supplementary characters (with code points U+10000 and above) are
425   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
426   * which means that they compare as less than some other BMP characters like U+feff.
427   * This function compares Unicode strings in code point order.
428   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
429   *
430   * @param start The start offset in this string at which the compare operation begins.
431   * @param length The number of code units from this string to compare.
432   * @param srcText Another string to compare this one to.
433   * @return a negative/zero/positive integer corresponding to whether
434   * this string is less than/equal to/greater than the second one
435   * in code point order
436   * @stable ICU 2.0
437   */
438  inline int8_t compareCodePointOrder(int32_t start,
439                                      int32_t length,
440                                      const UnicodeString& srcText) const;
441
442  /**
443   * Compare two Unicode strings in code point order.
444   * The result may be different from the results of compare(), operator<, etc.
445   * if supplementary characters are present:
446   *
447   * In UTF-16, supplementary characters (with code points U+10000 and above) are
448   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
449   * which means that they compare as less than some other BMP characters like U+feff.
450   * This function compares Unicode strings in code point order.
451   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
452   *
453   * @param start The start offset in this string at which the compare operation begins.
454   * @param length The number of code units from this string to compare.
455   * @param srcText Another string to compare this one to.
456   * @param srcStart The start offset in that string at which the compare operation begins.
457   * @param srcLength The number of code units from that string to compare.
458   * @return a negative/zero/positive integer corresponding to whether
459   * this string is less than/equal to/greater than the second one
460   * in code point order
461   * @stable ICU 2.0
462   */
463   inline int8_t compareCodePointOrder(int32_t start,
464                                       int32_t length,
465                                       const UnicodeString& srcText,
466                                       int32_t srcStart,
467                                       int32_t srcLength) const;
468
469  /**
470   * Compare two Unicode strings in code point order.
471   * The result may be different from the results of compare(), operator<, etc.
472   * if supplementary characters are present:
473   *
474   * In UTF-16, supplementary characters (with code points U+10000 and above) are
475   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
476   * which means that they compare as less than some other BMP characters like U+feff.
477   * This function compares Unicode strings in code point order.
478   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
479   *
480   * @param srcChars A pointer to another string to compare this one to.
481   * @param srcLength The number of code units from that string to compare.
482   * @return a negative/zero/positive integer corresponding to whether
483   * this string is less than/equal to/greater than the second one
484   * in code point order
485   * @stable ICU 2.0
486   */
487  inline int8_t compareCodePointOrder(const UChar *srcChars,
488                                      int32_t srcLength) const;
489
490  /**
491   * Compare two Unicode strings in code point order.
492   * The result may be different from the results of compare(), operator<, etc.
493   * if supplementary characters are present:
494   *
495   * In UTF-16, supplementary characters (with code points U+10000 and above) are
496   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
497   * which means that they compare as less than some other BMP characters like U+feff.
498   * This function compares Unicode strings in code point order.
499   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
500   *
501   * @param start The start offset in this string at which the compare operation begins.
502   * @param length The number of code units from this string to compare.
503   * @param srcChars A pointer to another string to compare this one to.
504   * @return a negative/zero/positive integer corresponding to whether
505   * this string is less than/equal to/greater than the second one
506   * in code point order
507   * @stable ICU 2.0
508   */
509  inline int8_t compareCodePointOrder(int32_t start,
510                                      int32_t length,
511                                      const UChar *srcChars) const;
512
513  /**
514   * Compare two Unicode strings in code point order.
515   * The result may be different from the results of compare(), operator<, etc.
516   * if supplementary characters are present:
517   *
518   * In UTF-16, supplementary characters (with code points U+10000 and above) are
519   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
520   * which means that they compare as less than some other BMP characters like U+feff.
521   * This function compares Unicode strings in code point order.
522   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
523   *
524   * @param start The start offset in this string at which the compare operation begins.
525   * @param length The number of code units from this string to compare.
526   * @param srcChars A pointer to another string to compare this one to.
527   * @param srcStart The start offset in that string at which the compare operation begins.
528   * @param srcLength The number of code units from that string to compare.
529   * @return a negative/zero/positive integer corresponding to whether
530   * this string is less than/equal to/greater than the second one
531   * in code point order
532   * @stable ICU 2.0
533   */
534  inline int8_t compareCodePointOrder(int32_t start,
535                                      int32_t length,
536                                      const UChar *srcChars,
537                                      int32_t srcStart,
538                                      int32_t srcLength) const;
539
540  /**
541   * Compare two Unicode strings in code point order.
542   * The result may be different from the results of compare(), operator<, etc.
543   * if supplementary characters are present:
544   *
545   * In UTF-16, supplementary characters (with code points U+10000 and above) are
546   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
547   * which means that they compare as less than some other BMP characters like U+feff.
548   * This function compares Unicode strings in code point order.
549   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
550   *
551   * @param start The start offset in this string at which the compare operation begins.
552   * @param limit The offset after the last code unit from this string to compare.
553   * @param srcText Another string to compare this one to.
554   * @param srcStart The start offset in that string at which the compare operation begins.
555   * @param srcLimit The offset after the last code unit from that string to compare.
556   * @return a negative/zero/positive integer corresponding to whether
557   * this string is less than/equal to/greater than the second one
558   * in code point order
559   * @stable ICU 2.0
560   */
561  inline int8_t compareCodePointOrderBetween(int32_t start,
562                                             int32_t limit,
563                                             const UnicodeString& srcText,
564                                             int32_t srcStart,
565                                             int32_t srcLimit) const;
566
567  /**
568   * Compare two strings case-insensitively using full case folding.
569   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
570   *
571   * @param text Another string to compare this one to.
572   * @param options A bit set of options:
573   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
574   *     Comparison in code unit order with default case folding.
575   *
576   *   - U_COMPARE_CODE_POINT_ORDER
577   *     Set to choose code point order instead of code unit order
578   *     (see u_strCompare for details).
579   *
580   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
581   *
582   * @return A negative, zero, or positive integer indicating the comparison result.
583   * @stable ICU 2.0
584   */
585  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
586
587  /**
588   * Compare two strings case-insensitively using full case folding.
589   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
590   *
591   * @param start The start offset in this string at which the compare operation begins.
592   * @param length The number of code units from this string to compare.
593   * @param srcText Another string to compare this one to.
594   * @param options A bit set of options:
595   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
596   *     Comparison in code unit order with default case folding.
597   *
598   *   - U_COMPARE_CODE_POINT_ORDER
599   *     Set to choose code point order instead of code unit order
600   *     (see u_strCompare for details).
601   *
602   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
603   *
604   * @return A negative, zero, or positive integer indicating the comparison result.
605   * @stable ICU 2.0
606   */
607  inline int8_t caseCompare(int32_t start,
608         int32_t length,
609         const UnicodeString& srcText,
610         uint32_t options) const;
611
612  /**
613   * Compare two strings case-insensitively using full case folding.
614   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
615   *
616   * @param start The start offset in this string at which the compare operation begins.
617   * @param length The number of code units from this string to compare.
618   * @param srcText Another string to compare this one to.
619   * @param srcStart The start offset in that string at which the compare operation begins.
620   * @param srcLength The number of code units from that string to compare.
621   * @param options A bit set of options:
622   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
623   *     Comparison in code unit order with default case folding.
624   *
625   *   - U_COMPARE_CODE_POINT_ORDER
626   *     Set to choose code point order instead of code unit order
627   *     (see u_strCompare for details).
628   *
629   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
630   *
631   * @return A negative, zero, or positive integer indicating the comparison result.
632   * @stable ICU 2.0
633   */
634  inline int8_t caseCompare(int32_t start,
635         int32_t length,
636         const UnicodeString& srcText,
637         int32_t srcStart,
638         int32_t srcLength,
639         uint32_t options) const;
640
641  /**
642   * Compare two strings case-insensitively using full case folding.
643   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
644   *
645   * @param srcChars A pointer to another string to compare this one to.
646   * @param srcLength The number of code units from that string to compare.
647   * @param options A bit set of options:
648   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
649   *     Comparison in code unit order with default case folding.
650   *
651   *   - U_COMPARE_CODE_POINT_ORDER
652   *     Set to choose code point order instead of code unit order
653   *     (see u_strCompare for details).
654   *
655   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
656   *
657   * @return A negative, zero, or positive integer indicating the comparison result.
658   * @stable ICU 2.0
659   */
660  inline int8_t caseCompare(const UChar *srcChars,
661         int32_t srcLength,
662         uint32_t options) const;
663
664  /**
665   * Compare two strings case-insensitively using full case folding.
666   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
667   *
668   * @param start The start offset in this string at which the compare operation begins.
669   * @param length The number of code units from this string to compare.
670   * @param srcChars A pointer to another string to compare this one to.
671   * @param options A bit set of options:
672   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
673   *     Comparison in code unit order with default case folding.
674   *
675   *   - U_COMPARE_CODE_POINT_ORDER
676   *     Set to choose code point order instead of code unit order
677   *     (see u_strCompare for details).
678   *
679   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
680   *
681   * @return A negative, zero, or positive integer indicating the comparison result.
682   * @stable ICU 2.0
683   */
684  inline int8_t caseCompare(int32_t start,
685         int32_t length,
686         const UChar *srcChars,
687         uint32_t options) const;
688
689  /**
690   * Compare two strings case-insensitively using full case folding.
691   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
692   *
693   * @param start The start offset in this string at which the compare operation begins.
694   * @param length The number of code units from this string to compare.
695   * @param srcChars A pointer to another string to compare this one to.
696   * @param srcStart The start offset in that string at which the compare operation begins.
697   * @param srcLength The number of code units from that string to compare.
698   * @param options A bit set of options:
699   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
700   *     Comparison in code unit order with default case folding.
701   *
702   *   - U_COMPARE_CODE_POINT_ORDER
703   *     Set to choose code point order instead of code unit order
704   *     (see u_strCompare for details).
705   *
706   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
707   *
708   * @return A negative, zero, or positive integer indicating the comparison result.
709   * @stable ICU 2.0
710   */
711  inline int8_t caseCompare(int32_t start,
712         int32_t length,
713         const UChar *srcChars,
714         int32_t srcStart,
715         int32_t srcLength,
716         uint32_t options) const;
717
718  /**
719   * Compare two strings case-insensitively using full case folding.
720   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
721   *
722   * @param start The start offset in this string at which the compare operation begins.
723   * @param limit The offset after the last code unit from this string to compare.
724   * @param srcText Another string to compare this one to.
725   * @param srcStart The start offset in that string at which the compare operation begins.
726   * @param srcLimit The offset after the last code unit from that string to compare.
727   * @param options A bit set of options:
728   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
729   *     Comparison in code unit order with default case folding.
730   *
731   *   - U_COMPARE_CODE_POINT_ORDER
732   *     Set to choose code point order instead of code unit order
733   *     (see u_strCompare for details).
734   *
735   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
736   *
737   * @return A negative, zero, or positive integer indicating the comparison result.
738   * @stable ICU 2.0
739   */
740  inline int8_t caseCompareBetween(int32_t start,
741            int32_t limit,
742            const UnicodeString& srcText,
743            int32_t srcStart,
744            int32_t srcLimit,
745            uint32_t options) const;
746
747  /**
748   * Determine if this starts with the characters in <TT>text</TT>
749   * @param text The text to match.
750   * @return TRUE if this starts with the characters in <TT>text</TT>,
751   * FALSE otherwise
752   * @stable ICU 2.0
753   */
754  inline UBool startsWith(const UnicodeString& text) const;
755
756  /**
757   * Determine if this starts with the characters in <TT>srcText</TT>
758   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
759   * @param srcText The text to match.
760   * @param srcStart the offset into <TT>srcText</TT> to start matching
761   * @param srcLength the number of characters in <TT>srcText</TT> to match
762   * @return TRUE if this starts with the characters in <TT>text</TT>,
763   * FALSE otherwise
764   * @stable ICU 2.0
765   */
766  inline UBool startsWith(const UnicodeString& srcText,
767            int32_t srcStart,
768            int32_t srcLength) const;
769
770  /**
771   * Determine if this starts with the characters in <TT>srcChars</TT>
772   * @param srcChars The characters to match.
773   * @param srcLength the number of characters in <TT>srcChars</TT>
774   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
775   * FALSE otherwise
776   * @stable ICU 2.0
777   */
778  inline UBool startsWith(const UChar *srcChars,
779            int32_t srcLength) const;
780
781  /**
782   * Determine if this ends with the characters in <TT>srcChars</TT>
783   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
784   * @param srcChars The characters to match.
785   * @param srcStart the offset into <TT>srcText</TT> to start matching
786   * @param srcLength the number of characters in <TT>srcChars</TT> to match
787   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
788   * @stable ICU 2.0
789   */
790  inline UBool startsWith(const UChar *srcChars,
791            int32_t srcStart,
792            int32_t srcLength) const;
793
794  /**
795   * Determine if this ends with the characters in <TT>text</TT>
796   * @param text The text to match.
797   * @return TRUE if this ends with the characters in <TT>text</TT>,
798   * FALSE otherwise
799   * @stable ICU 2.0
800   */
801  inline UBool endsWith(const UnicodeString& text) const;
802
803  /**
804   * Determine if this ends with the characters in <TT>srcText</TT>
805   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
806   * @param srcText The text to match.
807   * @param srcStart the offset into <TT>srcText</TT> to start matching
808   * @param srcLength the number of characters in <TT>srcText</TT> to match
809   * @return TRUE if this ends with the characters in <TT>text</TT>,
810   * FALSE otherwise
811   * @stable ICU 2.0
812   */
813  inline UBool endsWith(const UnicodeString& srcText,
814          int32_t srcStart,
815          int32_t srcLength) const;
816
817  /**
818   * Determine if this ends with the characters in <TT>srcChars</TT>
819   * @param srcChars The characters to match.
820   * @param srcLength the number of characters in <TT>srcChars</TT>
821   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
822   * FALSE otherwise
823   * @stable ICU 2.0
824   */
825  inline UBool endsWith(const UChar *srcChars,
826          int32_t srcLength) const;
827
828  /**
829   * Determine if this ends with the characters in <TT>srcChars</TT>
830   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
831   * @param srcChars The characters to match.
832   * @param srcStart the offset into <TT>srcText</TT> to start matching
833   * @param srcLength the number of characters in <TT>srcChars</TT> to match
834   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
835   * FALSE otherwise
836   * @stable ICU 2.0
837   */
838  inline UBool endsWith(const UChar *srcChars,
839          int32_t srcStart,
840          int32_t srcLength) const;
841
842
843  /* Searching - bitwise only */
844
845  /**
846   * Locate in this the first occurrence of the characters in <TT>text</TT>,
847   * using bitwise comparison.
848   * @param text The text to search for.
849   * @return The offset into this of the start of <TT>text</TT>,
850   * or -1 if not found.
851   * @stable ICU 2.0
852   */
853  inline int32_t indexOf(const UnicodeString& text) const;
854
855  /**
856   * Locate in this the first occurrence of the characters in <TT>text</TT>
857   * starting at offset <TT>start</TT>, using bitwise comparison.
858   * @param text The text to search for.
859   * @param start The offset at which searching will start.
860   * @return The offset into this of the start of <TT>text</TT>,
861   * or -1 if not found.
862   * @stable ICU 2.0
863   */
864  inline int32_t indexOf(const UnicodeString& text,
865              int32_t start) const;
866
867  /**
868   * Locate in this the first occurrence in the range
869   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
870   * in <TT>text</TT>, using bitwise comparison.
871   * @param text The text to search for.
872   * @param start The offset at which searching will start.
873   * @param length The number of characters to search
874   * @return The offset into this of the start of <TT>text</TT>,
875   * or -1 if not found.
876   * @stable ICU 2.0
877   */
878  inline int32_t indexOf(const UnicodeString& text,
879              int32_t start,
880              int32_t length) const;
881
882  /**
883   * Locate in this the first occurrence in the range
884   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
885   *  in <TT>srcText</TT> in the range
886   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
887   * using bitwise comparison.
888   * @param srcText The text to search for.
889   * @param srcStart the offset into <TT>srcText</TT> at which
890   * to start matching
891   * @param srcLength the number of characters in <TT>srcText</TT> to match
892   * @param start the offset into this at which to start matching
893   * @param length the number of characters in this to search
894   * @return The offset into this of the start of <TT>text</TT>,
895   * or -1 if not found.
896   * @stable ICU 2.0
897   */
898  inline int32_t indexOf(const UnicodeString& srcText,
899              int32_t srcStart,
900              int32_t srcLength,
901              int32_t start,
902              int32_t length) const;
903
904  /**
905   * Locate in this the first occurrence of the characters in
906   * <TT>srcChars</TT>
907   * starting at offset <TT>start</TT>, using bitwise comparison.
908   * @param srcChars The text to search for.
909   * @param srcLength the number of characters in <TT>srcChars</TT> to match
910   * @param start the offset into this at which to start matching
911   * @return The offset into this of the start of <TT>text</TT>,
912   * or -1 if not found.
913   * @stable ICU 2.0
914   */
915  inline int32_t indexOf(const UChar *srcChars,
916              int32_t srcLength,
917              int32_t start) const;
918
919  /**
920   * Locate in this the first occurrence in the range
921   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
922   * in <TT>srcChars</TT>, using bitwise comparison.
923   * @param srcChars The text to search for.
924   * @param srcLength the number of characters in <TT>srcChars</TT>
925   * @param start The offset at which searching will start.
926   * @param length The number of characters to search
927   * @return The offset into this of the start of <TT>srcChars</TT>,
928   * or -1 if not found.
929   * @stable ICU 2.0
930   */
931  inline int32_t indexOf(const UChar *srcChars,
932              int32_t srcLength,
933              int32_t start,
934              int32_t length) const;
935
936  /**
937   * Locate in this the first occurrence in the range
938   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
939   * in <TT>srcChars</TT> in the range
940   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
941   * using bitwise comparison.
942   * @param srcChars The text to search for.
943   * @param srcStart the offset into <TT>srcChars</TT> at which
944   * to start matching
945   * @param srcLength the number of characters in <TT>srcChars</TT> to match
946   * @param start the offset into this at which to start matching
947   * @param length the number of characters in this to search
948   * @return The offset into this of the start of <TT>text</TT>,
949   * or -1 if not found.
950   * @stable ICU 2.0
951   */
952  int32_t indexOf(const UChar *srcChars,
953              int32_t srcStart,
954              int32_t srcLength,
955              int32_t start,
956              int32_t length) const;
957
958  /**
959   * Locate in this the first occurrence of the BMP code point <code>c</code>,
960   * using bitwise comparison.
961   * @param c The code unit to search for.
962   * @return The offset into this of <TT>c</TT>, or -1 if not found.
963   * @stable ICU 2.0
964   */
965  inline int32_t indexOf(UChar c) const;
966
967  /**
968   * Locate in this the first occurrence of the code point <TT>c</TT>,
969   * using bitwise comparison.
970   *
971   * @param c The code point to search for.
972   * @return The offset into this of <TT>c</TT>, or -1 if not found.
973   * @stable ICU 2.0
974   */
975  inline int32_t indexOf(UChar32 c) const;
976
977  /**
978   * Locate in this the first occurrence of the BMP code point <code>c</code>,
979   * starting at offset <TT>start</TT>, using bitwise comparison.
980   * @param c The code unit to search for.
981   * @param start The offset at which searching will start.
982   * @return The offset into this of <TT>c</TT>, or -1 if not found.
983   * @stable ICU 2.0
984   */
985  inline int32_t indexOf(UChar c,
986              int32_t start) const;
987
988  /**
989   * Locate in this the first occurrence of the code point <TT>c</TT>
990   * starting at offset <TT>start</TT>, using bitwise comparison.
991   *
992   * @param c The code point to search for.
993   * @param start The offset at which searching will start.
994   * @return The offset into this of <TT>c</TT>, or -1 if not found.
995   * @stable ICU 2.0
996   */
997  inline int32_t indexOf(UChar32 c,
998              int32_t start) const;
999
1000  /**
1001   * Locate in this the first occurrence of the BMP code point <code>c</code>
1002   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1003   * using bitwise comparison.
1004   * @param c The code unit to search for.
1005   * @param start the offset into this at which to start matching
1006   * @param length the number of characters in this to search
1007   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1008   * @stable ICU 2.0
1009   */
1010  inline int32_t indexOf(UChar c,
1011              int32_t start,
1012              int32_t length) const;
1013
1014  /**
1015   * Locate in this the first occurrence of the code point <TT>c</TT>
1016   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1017   * using bitwise comparison.
1018   *
1019   * @param c The code point to search for.
1020   * @param start the offset into this at which to start matching
1021   * @param length the number of characters in this to search
1022   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1023   * @stable ICU 2.0
1024   */
1025  inline int32_t indexOf(UChar32 c,
1026              int32_t start,
1027              int32_t length) const;
1028
1029  /**
1030   * Locate in this the last occurrence of the characters in <TT>text</TT>,
1031   * using bitwise comparison.
1032   * @param text The text to search for.
1033   * @return The offset into this of the start of <TT>text</TT>,
1034   * or -1 if not found.
1035   * @stable ICU 2.0
1036   */
1037  inline int32_t lastIndexOf(const UnicodeString& text) const;
1038
1039  /**
1040   * Locate in this the last occurrence of the characters in <TT>text</TT>
1041   * starting at offset <TT>start</TT>, using bitwise comparison.
1042   * @param text The text to search for.
1043   * @param start The offset at which searching will start.
1044   * @return The offset into this of the start of <TT>text</TT>,
1045   * or -1 if not found.
1046   * @stable ICU 2.0
1047   */
1048  inline int32_t lastIndexOf(const UnicodeString& text,
1049              int32_t start) const;
1050
1051  /**
1052   * Locate in this the last occurrence in the range
1053   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1054   * in <TT>text</TT>, using bitwise comparison.
1055   * @param text The text to search for.
1056   * @param start The offset at which searching will start.
1057   * @param length The number of characters to search
1058   * @return The offset into this of the start of <TT>text</TT>,
1059   * or -1 if not found.
1060   * @stable ICU 2.0
1061   */
1062  inline int32_t lastIndexOf(const UnicodeString& text,
1063              int32_t start,
1064              int32_t length) const;
1065
1066  /**
1067   * Locate in this the last occurrence in the range
1068   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1069   * in <TT>srcText</TT> in the range
1070   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1071   * using bitwise comparison.
1072   * @param srcText The text to search for.
1073   * @param srcStart the offset into <TT>srcText</TT> at which
1074   * to start matching
1075   * @param srcLength the number of characters in <TT>srcText</TT> to match
1076   * @param start the offset into this at which to start matching
1077   * @param length the number of characters in this to search
1078   * @return The offset into this of the start of <TT>text</TT>,
1079   * or -1 if not found.
1080   * @stable ICU 2.0
1081   */
1082  inline int32_t lastIndexOf(const UnicodeString& srcText,
1083              int32_t srcStart,
1084              int32_t srcLength,
1085              int32_t start,
1086              int32_t length) const;
1087
1088  /**
1089   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1090   * starting at offset <TT>start</TT>, using bitwise comparison.
1091   * @param srcChars The text to search for.
1092   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1093   * @param start the offset into this at which to start matching
1094   * @return The offset into this of the start of <TT>text</TT>,
1095   * or -1 if not found.
1096   * @stable ICU 2.0
1097   */
1098  inline int32_t lastIndexOf(const UChar *srcChars,
1099              int32_t srcLength,
1100              int32_t start) const;
1101
1102  /**
1103   * Locate in this the last occurrence in the range
1104   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1105   * in <TT>srcChars</TT>, using bitwise comparison.
1106   * @param srcChars The text to search for.
1107   * @param srcLength the number of characters in <TT>srcChars</TT>
1108   * @param start The offset at which searching will start.
1109   * @param length The number of characters to search
1110   * @return The offset into this of the start of <TT>srcChars</TT>,
1111   * or -1 if not found.
1112   * @stable ICU 2.0
1113   */
1114  inline int32_t lastIndexOf(const UChar *srcChars,
1115              int32_t srcLength,
1116              int32_t start,
1117              int32_t length) const;
1118
1119  /**
1120   * Locate in this the last occurrence in the range
1121   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1122   * in <TT>srcChars</TT> in the range
1123   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1124   * using bitwise comparison.
1125   * @param srcChars The text to search for.
1126   * @param srcStart the offset into <TT>srcChars</TT> at which
1127   * to start matching
1128   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1129   * @param start the offset into this at which to start matching
1130   * @param length the number of characters in this to search
1131   * @return The offset into this of the start of <TT>text</TT>,
1132   * or -1 if not found.
1133   * @stable ICU 2.0
1134   */
1135  int32_t lastIndexOf(const UChar *srcChars,
1136              int32_t srcStart,
1137              int32_t srcLength,
1138              int32_t start,
1139              int32_t length) const;
1140
1141  /**
1142   * Locate in this the last occurrence of the BMP code point <code>c</code>,
1143   * using bitwise comparison.
1144   * @param c The code unit to search for.
1145   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1146   * @stable ICU 2.0
1147   */
1148  inline int32_t lastIndexOf(UChar c) const;
1149
1150  /**
1151   * Locate in this the last occurrence of the code point <TT>c</TT>,
1152   * using bitwise comparison.
1153   *
1154   * @param c The code point to search for.
1155   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1156   * @stable ICU 2.0
1157   */
1158  inline int32_t lastIndexOf(UChar32 c) const;
1159
1160  /**
1161   * Locate in this the last occurrence of the BMP code point <code>c</code>
1162   * starting at offset <TT>start</TT>, using bitwise comparison.
1163   * @param c The code unit to search for.
1164   * @param start The offset at which searching will start.
1165   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1166   * @stable ICU 2.0
1167   */
1168  inline int32_t lastIndexOf(UChar c,
1169              int32_t start) const;
1170
1171  /**
1172   * Locate in this the last occurrence of the code point <TT>c</TT>
1173   * starting at offset <TT>start</TT>, using bitwise comparison.
1174   *
1175   * @param c The code point to search for.
1176   * @param start The offset at which searching will start.
1177   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1178   * @stable ICU 2.0
1179   */
1180  inline int32_t lastIndexOf(UChar32 c,
1181              int32_t start) const;
1182
1183  /**
1184   * Locate in this the last occurrence of the BMP code point <code>c</code>
1185   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1186   * using bitwise comparison.
1187   * @param c The code unit to search for.
1188   * @param start the offset into this at which to start matching
1189   * @param length the number of characters in this to search
1190   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1191   * @stable ICU 2.0
1192   */
1193  inline int32_t lastIndexOf(UChar c,
1194              int32_t start,
1195              int32_t length) const;
1196
1197  /**
1198   * Locate in this the last occurrence of the code point <TT>c</TT>
1199   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1200   * using bitwise comparison.
1201   *
1202   * @param c The code point to search for.
1203   * @param start the offset into this at which to start matching
1204   * @param length the number of characters in this to search
1205   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1206   * @stable ICU 2.0
1207   */
1208  inline int32_t lastIndexOf(UChar32 c,
1209              int32_t start,
1210              int32_t length) const;
1211
1212
1213  /* Character access */
1214
1215  /**
1216   * Return the code unit at offset <tt>offset</tt>.
1217   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1218   * @param offset a valid offset into the text
1219   * @return the code unit at offset <tt>offset</tt>
1220   *         or 0xffff if the offset is not valid for this string
1221   * @stable ICU 2.0
1222   */
1223  inline UChar charAt(int32_t offset) const;
1224
1225  /**
1226   * Return the code unit at offset <tt>offset</tt>.
1227   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1228   * @param offset a valid offset into the text
1229   * @return the code unit at offset <tt>offset</tt>
1230   * @stable ICU 2.0
1231   */
1232  inline UChar operator[] (int32_t offset) const;
1233
1234  /**
1235   * Return the code point that contains the code unit
1236   * at offset <tt>offset</tt>.
1237   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1238   * @param offset a valid offset into the text
1239   * that indicates the text offset of any of the code units
1240   * that will be assembled into a code point (21-bit value) and returned
1241   * @return the code point of text at <tt>offset</tt>
1242   *         or 0xffff if the offset is not valid for this string
1243   * @stable ICU 2.0
1244   */
1245  inline UChar32 char32At(int32_t offset) const;
1246
1247  /**
1248   * Adjust a random-access offset so that
1249   * it points to the beginning of a Unicode character.
1250   * The offset that is passed in points to
1251   * any code unit of a code point,
1252   * while the returned offset will point to the first code unit
1253   * of the same code point.
1254   * In UTF-16, if the input offset points to a second surrogate
1255   * of a surrogate pair, then the returned offset will point
1256   * to the first surrogate.
1257   * @param offset a valid offset into one code point of the text
1258   * @return offset of the first code unit of the same code point
1259   * @see U16_SET_CP_START
1260   * @stable ICU 2.0
1261   */
1262  inline int32_t getChar32Start(int32_t offset) const;
1263
1264  /**
1265   * Adjust a random-access offset so that
1266   * it points behind a Unicode character.
1267   * The offset that is passed in points behind
1268   * any code unit of a code point,
1269   * while the returned offset will point behind the last code unit
1270   * of the same code point.
1271   * In UTF-16, if the input offset points behind the first surrogate
1272   * (i.e., to the second surrogate)
1273   * of a surrogate pair, then the returned offset will point
1274   * behind the second surrogate (i.e., to the first surrogate).
1275   * @param offset a valid offset after any code unit of a code point of the text
1276   * @return offset of the first code unit after the same code point
1277   * @see U16_SET_CP_LIMIT
1278   * @stable ICU 2.0
1279   */
1280  inline int32_t getChar32Limit(int32_t offset) const;
1281
1282  /**
1283   * Move the code unit index along the string by delta code points.
1284   * Interpret the input index as a code unit-based offset into the string,
1285   * move the index forward or backward by delta code points, and
1286   * return the resulting index.
1287   * The input index should point to the first code unit of a code point,
1288   * if there is more than one.
1289   *
1290   * Both input and output indexes are code unit-based as for all
1291   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1292   * If delta<0 then the index is moved backward (toward the start of the string).
1293   * If delta>0 then the index is moved forward (toward the end of the string).
1294   *
1295   * This behaves like CharacterIterator::move32(delta, kCurrent).
1296   *
1297   * Behavior for out-of-bounds indexes:
1298   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1299   * if the input index<0 then it is pinned to 0;
1300   * if it is index>length() then it is pinned to length().
1301   * Afterwards, the index is moved by <code>delta</code> code points
1302   * forward or backward,
1303   * but no further backward than to 0 and no further forward than to length().
1304   * The resulting index return value will be in between 0 and length(), inclusively.
1305   *
1306   * Examples:
1307   * <pre>
1308   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1309   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1310   *
1311   * // initial index: position of U+10000
1312   * int32_t index=1;
1313   *
1314   * // the following examples will all result in index==4, position of U+10ffff
1315   *
1316   * // skip 2 code points from some position in the string
1317   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1318   *
1319   * // go to the 3rd code point from the start of s (0-based)
1320   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1321   *
1322   * // go to the next-to-last code point of s
1323   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1324   * </pre>
1325   *
1326   * @param index input code unit index
1327   * @param delta (signed) code point count to move the index forward or backward
1328   *        in the string
1329   * @return the resulting code unit index
1330   * @stable ICU 2.0
1331   */
1332  int32_t moveIndex32(int32_t index, int32_t delta) const;
1333
1334  /* Substring extraction */
1335
1336  /**
1337   * Copy the characters in the range
1338   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1339   * beginning at <tt>dstStart</tt>.
1340   * If the string aliases to <code>dst</code> itself as an external buffer,
1341   * then extract() will not copy the contents.
1342   *
1343   * @param start offset of first character which will be copied into the array
1344   * @param length the number of characters to extract
1345   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1346   * must be at least (<tt>dstStart + length</tt>).
1347   * @param dstStart the offset in <TT>dst</TT> where the first character
1348   * will be extracted
1349   * @stable ICU 2.0
1350   */
1351  inline void extract(int32_t start,
1352           int32_t length,
1353           UChar *dst,
1354           int32_t dstStart = 0) const;
1355
1356  /**
1357   * Copy the contents of the string into dest.
1358   * This is a convenience function that
1359   * checks if there is enough space in dest,
1360   * extracts the entire string if possible,
1361   * and NUL-terminates dest if possible.
1362   *
1363   * If the string fits into dest but cannot be NUL-terminated
1364   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1365   * If the string itself does not fit into dest
1366   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1367   *
1368   * If the string aliases to <code>dest</code> itself as an external buffer,
1369   * then extract() will not copy the contents.
1370   *
1371   * @param dest Destination string buffer.
1372   * @param destCapacity Number of UChars available at dest.
1373   * @param errorCode ICU error code.
1374   * @return length()
1375   * @stable ICU 2.0
1376   */
1377  int32_t
1378  extract(UChar *dest, int32_t destCapacity,
1379          UErrorCode &errorCode) const;
1380
1381  /**
1382   * Copy the characters in the range
1383   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1384   * <tt>target</tt>.
1385   * @param start offset of first character which will be copied
1386   * @param length the number of characters to extract
1387   * @param target UnicodeString into which to copy characters.
1388   * @return A reference to <TT>target</TT>
1389   * @stable ICU 2.0
1390   */
1391  inline void extract(int32_t start,
1392           int32_t length,
1393           UnicodeString& target) const;
1394
1395  /**
1396   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1397   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1398   * @param start offset of first character which will be copied into the array
1399   * @param limit offset immediately following the last character to be copied
1400   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1401   * must be at least (<tt>dstStart + (limit - start)</tt>).
1402   * @param dstStart the offset in <TT>dst</TT> where the first character
1403   * will be extracted
1404   * @stable ICU 2.0
1405   */
1406  inline void extractBetween(int32_t start,
1407              int32_t limit,
1408              UChar *dst,
1409              int32_t dstStart = 0) const;
1410
1411  /**
1412   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1413   * into the UnicodeString <tt>target</tt>.  Replaceable API.
1414   * @param start offset of first character which will be copied
1415   * @param limit offset immediately following the last character to be copied
1416   * @param target UnicodeString into which to copy characters.
1417   * @return A reference to <TT>target</TT>
1418   * @stable ICU 2.0
1419   */
1420  virtual void extractBetween(int32_t start,
1421              int32_t limit,
1422              UnicodeString& target) const;
1423
1424  /**
1425   * Copy the characters in the range
1426   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1427   * All characters must be invariant (see utypes.h).
1428   * Use US_INV as the last, signature-distinguishing parameter.
1429   *
1430   * This function does not write any more than <code>targetLength</code>
1431   * characters but returns the length of the entire output string
1432   * so that one can allocate a larger buffer and call the function again
1433   * if necessary.
1434   * The output string is NUL-terminated if possible.
1435   *
1436   * @param start offset of first character which will be copied
1437   * @param startLength the number of characters to extract
1438   * @param target the target buffer for extraction, can be NULL
1439   *               if targetLength is 0
1440   * @param targetCapacity the length of the target buffer
1441   * @param inv Signature-distinguishing paramater, use US_INV.
1442   * @return the output string length, not including the terminating NUL
1443   * @stable ICU 3.2
1444   */
1445  int32_t extract(int32_t start,
1446           int32_t startLength,
1447           char *target,
1448           int32_t targetCapacity,
1449           enum EInvariant inv) const;
1450
1451#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1452
1453  /**
1454   * Copy the characters in the range
1455   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1456   * in the platform's default codepage.
1457   * This function does not write any more than <code>targetLength</code>
1458   * characters but returns the length of the entire output string
1459   * so that one can allocate a larger buffer and call the function again
1460   * if necessary.
1461   * The output string is NUL-terminated if possible.
1462   *
1463   * @param start offset of first character which will be copied
1464   * @param startLength the number of characters to extract
1465   * @param target the target buffer for extraction
1466   * @param targetLength the length of the target buffer
1467   * If <TT>target</TT> is NULL, then the number of bytes required for
1468   * <TT>target</TT> is returned.
1469   * @return the output string length, not including the terminating NUL
1470   * @stable ICU 2.0
1471   */
1472  int32_t extract(int32_t start,
1473           int32_t startLength,
1474           char *target,
1475           uint32_t targetLength) const;
1476
1477#endif
1478
1479#if !UCONFIG_NO_CONVERSION
1480
1481  /**
1482   * Copy the characters in the range
1483   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1484   * in a specified codepage.
1485   * The output string is NUL-terminated.
1486   *
1487   * Recommendation: For invariant-character strings use
1488   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1489   * because it avoids object code dependencies of UnicodeString on
1490   * the conversion code.
1491   *
1492   * @param start offset of first character which will be copied
1493   * @param startLength the number of characters to extract
1494   * @param target the target buffer for extraction
1495   * @param codepage the desired codepage for the characters.  0 has
1496   * the special meaning of the default codepage
1497   * If <code>codepage</code> is an empty string (<code>""</code>),
1498   * then a simple conversion is performed on the codepage-invariant
1499   * subset ("invariant characters") of the platform encoding. See utypes.h.
1500   * If <TT>target</TT> is NULL, then the number of bytes required for
1501   * <TT>target</TT> is returned. It is assumed that the target is big enough
1502   * to fit all of the characters.
1503   * @return the output string length, not including the terminating NUL
1504   * @stable ICU 2.0
1505   */
1506  inline int32_t extract(int32_t start,
1507                 int32_t startLength,
1508                 char *target,
1509                 const char *codepage = 0) const;
1510
1511  /**
1512   * Copy the characters in the range
1513   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1514   * in a specified codepage.
1515   * This function does not write any more than <code>targetLength</code>
1516   * characters but returns the length of the entire output string
1517   * so that one can allocate a larger buffer and call the function again
1518   * if necessary.
1519   * The output string is NUL-terminated if possible.
1520   *
1521   * Recommendation: For invariant-character strings use
1522   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1523   * because it avoids object code dependencies of UnicodeString on
1524   * the conversion code.
1525   *
1526   * @param start offset of first character which will be copied
1527   * @param startLength the number of characters to extract
1528   * @param target the target buffer for extraction
1529   * @param targetLength the length of the target buffer
1530   * @param codepage the desired codepage for the characters.  0 has
1531   * the special meaning of the default codepage
1532   * If <code>codepage</code> is an empty string (<code>""</code>),
1533   * then a simple conversion is performed on the codepage-invariant
1534   * subset ("invariant characters") of the platform encoding. See utypes.h.
1535   * If <TT>target</TT> is NULL, then the number of bytes required for
1536   * <TT>target</TT> is returned.
1537   * @return the output string length, not including the terminating NUL
1538   * @stable ICU 2.0
1539   */
1540  int32_t extract(int32_t start,
1541           int32_t startLength,
1542           char *target,
1543           uint32_t targetLength,
1544           const char *codepage) const;
1545
1546  /**
1547   * Convert the UnicodeString into a codepage string using an existing UConverter.
1548   * The output string is NUL-terminated if possible.
1549   *
1550   * This function avoids the overhead of opening and closing a converter if
1551   * multiple strings are extracted.
1552   *
1553   * @param dest destination string buffer, can be NULL if destCapacity==0
1554   * @param destCapacity the number of chars available at dest
1555   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1556   *        or NULL for the default converter
1557   * @param errorCode normal ICU error code
1558   * @return the length of the output string, not counting the terminating NUL;
1559   *         if the length is greater than destCapacity, then the string will not fit
1560   *         and a buffer of the indicated length would need to be passed in
1561   * @stable ICU 2.0
1562   */
1563  int32_t extract(char *dest, int32_t destCapacity,
1564                  UConverter *cnv,
1565                  UErrorCode &errorCode) const;
1566
1567#endif
1568
1569  /**
1570   * Create a temporary substring for the specified range.
1571   * Unlike the substring constructor and setTo() functions,
1572   * the object returned here will be a read-only alias (using getBuffer())
1573   * rather than copying the text.
1574   * As a result, this substring operation is much faster but requires
1575   * that the original string not be modified or deleted during the lifetime
1576   * of the returned substring object.
1577   * @param start offset of the first character visible in the substring
1578   * @param length length of the substring
1579   * @return a read-only alias UnicodeString object for the substring
1580   * @stable ICU 4.4
1581   */
1582  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1583
1584  /**
1585   * Create a temporary substring for the specified range.
1586   * Same as tempSubString(start, length) except that the substring range
1587   * is specified as a (start, limit) pair (with an exclusive limit index)
1588   * rather than a (start, length) pair.
1589   * @param start offset of the first character visible in the substring
1590   * @param limit offset immediately following the last character visible in the substring
1591   * @return a read-only alias UnicodeString object for the substring
1592   * @stable ICU 4.4
1593   */
1594  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1595
1596  /**
1597   * Convert the UnicodeString to UTF-8 and write the result
1598   * to a ByteSink. This is called by toUTF8String().
1599   * Unpaired surrogates are replaced with U+FFFD.
1600   * Calls u_strToUTF8WithSub().
1601   *
1602   * @param sink A ByteSink to which the UTF-8 version of the string is written.
1603   *             sink.Flush() is called at the end.
1604   * @stable ICU 4.2
1605   * @see toUTF8String
1606   */
1607  void toUTF8(ByteSink &sink) const;
1608
1609#if U_HAVE_STD_STRING
1610
1611  /**
1612   * Convert the UnicodeString to UTF-8 and append the result
1613   * to a standard string.
1614   * Unpaired surrogates are replaced with U+FFFD.
1615   * Calls toUTF8().
1616   *
1617   * @param result A standard string (or a compatible object)
1618   *        to which the UTF-8 version of the string is appended.
1619   * @return The string object.
1620   * @stable ICU 4.2
1621   * @see toUTF8
1622   */
1623  template<typename StringClass>
1624  StringClass &toUTF8String(StringClass &result) const {
1625    StringByteSink<StringClass> sbs(&result);
1626    toUTF8(sbs);
1627    return result;
1628  }
1629
1630#endif
1631
1632  /**
1633   * Convert the UnicodeString to UTF-32.
1634   * Unpaired surrogates are replaced with U+FFFD.
1635   * Calls u_strToUTF32WithSub().
1636   *
1637   * @param utf32 destination string buffer, can be NULL if capacity==0
1638   * @param capacity the number of UChar32s available at utf32
1639   * @param errorCode Standard ICU error code. Its input value must
1640   *                  pass the U_SUCCESS() test, or else the function returns
1641   *                  immediately. Check for U_FAILURE() on output or use with
1642   *                  function chaining. (See User Guide for details.)
1643   * @return The length of the UTF-32 string.
1644   * @see fromUTF32
1645   * @stable ICU 4.2
1646   */
1647  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1648
1649  /* Length operations */
1650
1651  /**
1652   * Return the length of the UnicodeString object.
1653   * The length is the number of UChar code units are in the UnicodeString.
1654   * If you want the number of code points, please use countChar32().
1655   * @return the length of the UnicodeString object
1656   * @see countChar32
1657   * @stable ICU 2.0
1658   */
1659  inline int32_t length(void) const;
1660
1661  /**
1662   * Count Unicode code points in the length UChar code units of the string.
1663   * A code point may occupy either one or two UChar code units.
1664   * Counting code points involves reading all code units.
1665   *
1666   * This functions is basically the inverse of moveIndex32().
1667   *
1668   * @param start the index of the first code unit to check
1669   * @param length the number of UChar code units to check
1670   * @return the number of code points in the specified code units
1671   * @see length
1672   * @stable ICU 2.0
1673   */
1674  int32_t
1675  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1676
1677  /**
1678   * Check if the length UChar code units of the string
1679   * contain more Unicode code points than a certain number.
1680   * This is more efficient than counting all code points in this part of the string
1681   * and comparing that number with a threshold.
1682   * This function may not need to scan the string at all if the length
1683   * falls within a certain range, and
1684   * never needs to count more than 'number+1' code points.
1685   * Logically equivalent to (countChar32(start, length)>number).
1686   * A Unicode code point may occupy either one or two UChar code units.
1687   *
1688   * @param start the index of the first code unit to check (0 for the entire string)
1689   * @param length the number of UChar code units to check
1690   *               (use INT32_MAX for the entire string; remember that start/length
1691   *                values are pinned)
1692   * @param number The number of code points in the (sub)string is compared against
1693   *               the 'number' parameter.
1694   * @return Boolean value for whether the string contains more Unicode code points
1695   *         than 'number'. Same as (u_countChar32(s, length)>number).
1696   * @see countChar32
1697   * @see u_strHasMoreChar32Than
1698   * @stable ICU 2.4
1699   */
1700  UBool
1701  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1702
1703  /**
1704   * Determine if this string is empty.
1705   * @return TRUE if this string contains 0 characters, FALSE otherwise.
1706   * @stable ICU 2.0
1707   */
1708  inline UBool isEmpty(void) const;
1709
1710  /**
1711   * Return the capacity of the internal buffer of the UnicodeString object.
1712   * This is useful together with the getBuffer functions.
1713   * See there for details.
1714   *
1715   * @return the number of UChars available in the internal buffer
1716   * @see getBuffer
1717   * @stable ICU 2.0
1718   */
1719  inline int32_t getCapacity(void) const;
1720
1721  /* Other operations */
1722
1723  /**
1724   * Generate a hash code for this object.
1725   * @return The hash code of this UnicodeString.
1726   * @stable ICU 2.0
1727   */
1728  inline int32_t hashCode(void) const;
1729
1730  /**
1731   * Determine if this object contains a valid string.
1732   * A bogus string has no value. It is different from an empty string,
1733   * although in both cases isEmpty() returns TRUE and length() returns 0.
1734   * setToBogus() and isBogus() can be used to indicate that no string value is available.
1735   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1736   * length() returns 0.
1737   *
1738   * @return TRUE if the string is valid, FALSE otherwise
1739   * @see setToBogus()
1740   * @stable ICU 2.0
1741   */
1742  inline UBool isBogus(void) const;
1743
1744
1745  //========================================
1746  // Write operations
1747  //========================================
1748
1749  /* Assignment operations */
1750
1751  /**
1752   * Assignment operator.  Replace the characters in this UnicodeString
1753   * with the characters from <TT>srcText</TT>.
1754   * @param srcText The text containing the characters to replace
1755   * @return a reference to this
1756   * @stable ICU 2.0
1757   */
1758  UnicodeString &operator=(const UnicodeString &srcText);
1759
1760  /**
1761   * Almost the same as the assignment operator.
1762   * Replace the characters in this UnicodeString
1763   * with the characters from <code>srcText</code>.
1764   *
1765   * This function works the same for all strings except for ones that
1766   * are readonly aliases.
1767   * Starting with ICU 2.4, the assignment operator and the copy constructor
1768   * allocate a new buffer and copy the buffer contents even for readonly aliases.
1769   * This function implements the old, more efficient but less safe behavior
1770   * of making this string also a readonly alias to the same buffer.
1771   * The fastCopyFrom function must be used only if it is known that the lifetime of
1772   * this UnicodeString is at least as long as the lifetime of the aliased buffer
1773   * including its contents, for example for strings from resource bundles
1774   * or aliases to string contents.
1775   *
1776   * @param src The text containing the characters to replace.
1777   * @return a reference to this
1778   * @stable ICU 2.4
1779   */
1780  UnicodeString &fastCopyFrom(const UnicodeString &src);
1781
1782  /**
1783   * Assignment operator.  Replace the characters in this UnicodeString
1784   * with the code unit <TT>ch</TT>.
1785   * @param ch the code unit to replace
1786   * @return a reference to this
1787   * @stable ICU 2.0
1788   */
1789  inline UnicodeString& operator= (UChar ch);
1790
1791  /**
1792   * Assignment operator.  Replace the characters in this UnicodeString
1793   * with the code point <TT>ch</TT>.
1794   * @param ch the code point to replace
1795   * @return a reference to this
1796   * @stable ICU 2.0
1797   */
1798  inline UnicodeString& operator= (UChar32 ch);
1799
1800  /**
1801   * Set the text in the UnicodeString object to the characters
1802   * in <TT>srcText</TT> in the range
1803   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1804   * <TT>srcText</TT> is not modified.
1805   * @param srcText the source for the new characters
1806   * @param srcStart the offset into <TT>srcText</TT> where new characters
1807   * will be obtained
1808   * @return a reference to this
1809   * @stable ICU 2.2
1810   */
1811  inline UnicodeString& setTo(const UnicodeString& srcText,
1812               int32_t srcStart);
1813
1814  /**
1815   * Set the text in the UnicodeString object to the characters
1816   * in <TT>srcText</TT> in the range
1817   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1818   * <TT>srcText</TT> is not modified.
1819   * @param srcText the source for the new characters
1820   * @param srcStart the offset into <TT>srcText</TT> where new characters
1821   * will be obtained
1822   * @param srcLength the number of characters in <TT>srcText</TT> in the
1823   * replace string.
1824   * @return a reference to this
1825   * @stable ICU 2.0
1826   */
1827  inline UnicodeString& setTo(const UnicodeString& srcText,
1828               int32_t srcStart,
1829               int32_t srcLength);
1830
1831  /**
1832   * Set the text in the UnicodeString object to the characters in
1833   * <TT>srcText</TT>.
1834   * <TT>srcText</TT> is not modified.
1835   * @param srcText the source for the new characters
1836   * @return a reference to this
1837   * @stable ICU 2.0
1838   */
1839  inline UnicodeString& setTo(const UnicodeString& srcText);
1840
1841  /**
1842   * Set the characters in the UnicodeString object to the characters
1843   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1844   * @param srcChars the source for the new characters
1845   * @param srcLength the number of Unicode characters in srcChars.
1846   * @return a reference to this
1847   * @stable ICU 2.0
1848   */
1849  inline UnicodeString& setTo(const UChar *srcChars,
1850               int32_t srcLength);
1851
1852  /**
1853   * Set the characters in the UnicodeString object to the code unit
1854   * <TT>srcChar</TT>.
1855   * @param srcChar the code unit which becomes the UnicodeString's character
1856   * content
1857   * @return a reference to this
1858   * @stable ICU 2.0
1859   */
1860  UnicodeString& setTo(UChar srcChar);
1861
1862  /**
1863   * Set the characters in the UnicodeString object to the code point
1864   * <TT>srcChar</TT>.
1865   * @param srcChar the code point which becomes the UnicodeString's character
1866   * content
1867   * @return a reference to this
1868   * @stable ICU 2.0
1869   */
1870  UnicodeString& setTo(UChar32 srcChar);
1871
1872  /**
1873   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1874   * The text will be used for the UnicodeString object, but
1875   * it will not be released when the UnicodeString is destroyed.
1876   * This has copy-on-write semantics:
1877   * When the string is modified, then the buffer is first copied into
1878   * newly allocated memory.
1879   * The aliased buffer is never modified.
1880   * In an assignment to another UnicodeString, the text will be aliased again,
1881   * so that both strings then alias the same readonly-text.
1882   *
1883   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1884   *                     This must be true if <code>textLength==-1</code>.
1885   * @param text The characters to alias for the UnicodeString.
1886   * @param textLength The number of Unicode characters in <code>text</code> to alias.
1887   *                   If -1, then this constructor will determine the length
1888   *                   by calling <code>u_strlen()</code>.
1889   * @return a reference to this
1890   * @stable ICU 2.0
1891   */
1892  UnicodeString &setTo(UBool isTerminated,
1893                       const UChar *text,
1894                       int32_t textLength);
1895
1896  /**
1897   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1898   * The text will be used for the UnicodeString object, but
1899   * it will not be released when the UnicodeString is destroyed.
1900   * This has write-through semantics:
1901   * For as long as the capacity of the buffer is sufficient, write operations
1902   * will directly affect the buffer. When more capacity is necessary, then
1903   * a new buffer will be allocated and the contents copied as with regularly
1904   * constructed strings.
1905   * In an assignment to another UnicodeString, the buffer will be copied.
1906   * The extract(UChar *dst) function detects whether the dst pointer is the same
1907   * as the string buffer itself and will in this case not copy the contents.
1908   *
1909   * @param buffer The characters to alias for the UnicodeString.
1910   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1911   * @param buffCapacity The size of <code>buffer</code> in UChars.
1912   * @return a reference to this
1913   * @stable ICU 2.0
1914   */
1915  UnicodeString &setTo(UChar *buffer,
1916                       int32_t buffLength,
1917                       int32_t buffCapacity);
1918
1919  /**
1920   * Make this UnicodeString object invalid.
1921   * The string will test TRUE with isBogus().
1922   *
1923   * A bogus string has no value. It is different from an empty string.
1924   * It can be used to indicate that no string value is available.
1925   * getBuffer() and getTerminatedBuffer() return NULL, and
1926   * length() returns 0.
1927   *
1928   * This utility function is used throughout the UnicodeString
1929   * implementation to indicate that a UnicodeString operation failed,
1930   * and may be used in other functions,
1931   * especially but not exclusively when such functions do not
1932   * take a UErrorCode for simplicity.
1933   *
1934   * The following methods, and no others, will clear a string object's bogus flag:
1935   * - remove()
1936   * - remove(0, INT32_MAX)
1937   * - truncate(0)
1938   * - operator=() (assignment operator)
1939   * - setTo(...)
1940   *
1941   * The simplest ways to turn a bogus string into an empty one
1942   * is to use the remove() function.
1943   * Examples for other functions that are equivalent to "set to empty string":
1944   * \code
1945   * if(s.isBogus()) {
1946   *   s.remove();           // set to an empty string (remove all), or
1947   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1948   *   s.truncate(0);        // set to an empty string (complete truncation), or
1949   *   s=UnicodeString();    // assign an empty string, or
1950   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1951   *   static const UChar nul=0;
1952   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
1953   * }
1954   * \endcode
1955   *
1956   * @see isBogus()
1957   * @stable ICU 2.0
1958   */
1959  void setToBogus();
1960
1961  /**
1962   * Set the character at the specified offset to the specified character.
1963   * @param offset A valid offset into the text of the character to set
1964   * @param ch The new character
1965   * @return A reference to this
1966   * @stable ICU 2.0
1967   */
1968  UnicodeString& setCharAt(int32_t offset,
1969               UChar ch);
1970
1971
1972  /* Append operations */
1973
1974  /**
1975   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1976   * object.
1977   * @param ch the code unit to be appended
1978   * @return a reference to this
1979   * @stable ICU 2.0
1980   */
1981 inline  UnicodeString& operator+= (UChar ch);
1982
1983  /**
1984   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1985   * object.
1986   * @param ch the code point to be appended
1987   * @return a reference to this
1988   * @stable ICU 2.0
1989   */
1990 inline  UnicodeString& operator+= (UChar32 ch);
1991
1992  /**
1993   * Append operator. Append the characters in <TT>srcText</TT> to the
1994   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
1995   * not modified.
1996   * @param srcText the source for the new characters
1997   * @return a reference to this
1998   * @stable ICU 2.0
1999   */
2000  inline UnicodeString& operator+= (const UnicodeString& srcText);
2001
2002  /**
2003   * Append the characters
2004   * in <TT>srcText</TT> in the range
2005   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2006   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2007   * is not modified.
2008   * @param srcText the source for the new characters
2009   * @param srcStart the offset into <TT>srcText</TT> where new characters
2010   * will be obtained
2011   * @param srcLength the number of characters in <TT>srcText</TT> in
2012   * the append string
2013   * @return a reference to this
2014   * @stable ICU 2.0
2015   */
2016  inline UnicodeString& append(const UnicodeString& srcText,
2017            int32_t srcStart,
2018            int32_t srcLength);
2019
2020  /**
2021   * Append the characters in <TT>srcText</TT> to the UnicodeString object at
2022   * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2023   * @param srcText the source for the new characters
2024   * @return a reference to this
2025   * @stable ICU 2.0
2026   */
2027  inline UnicodeString& append(const UnicodeString& srcText);
2028
2029  /**
2030   * Append the characters in <TT>srcChars</TT> in the range
2031   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2032   * object at offset
2033   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2034   * @param srcChars the source for the new characters
2035   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2036   * will be obtained
2037   * @param srcLength the number of characters in <TT>srcChars</TT> in
2038   * the append string
2039   * @return a reference to this
2040   * @stable ICU 2.0
2041   */
2042  inline UnicodeString& append(const UChar *srcChars,
2043            int32_t srcStart,
2044            int32_t srcLength);
2045
2046  /**
2047   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2048   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2049   * @param srcChars the source for the new characters
2050   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
2051   * @return a reference to this
2052   * @stable ICU 2.0
2053   */
2054  inline UnicodeString& append(const UChar *srcChars,
2055            int32_t srcLength);
2056
2057  /**
2058   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2059   * @param srcChar the code unit to append
2060   * @return a reference to this
2061   * @stable ICU 2.0
2062   */
2063  inline UnicodeString& append(UChar srcChar);
2064
2065  /**
2066   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2067   * @param srcChar the code point to append
2068   * @return a reference to this
2069   * @stable ICU 2.0
2070   */
2071  inline UnicodeString& append(UChar32 srcChar);
2072
2073
2074  /* Insert operations */
2075
2076  /**
2077   * Insert the characters in <TT>srcText</TT> in the range
2078   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2079   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2080   * @param start the offset where the insertion begins
2081   * @param srcText the source for the new characters
2082   * @param srcStart the offset into <TT>srcText</TT> where new characters
2083   * will be obtained
2084   * @param srcLength the number of characters in <TT>srcText</TT> in
2085   * the insert string
2086   * @return a reference to this
2087   * @stable ICU 2.0
2088   */
2089  inline UnicodeString& insert(int32_t start,
2090            const UnicodeString& srcText,
2091            int32_t srcStart,
2092            int32_t srcLength);
2093
2094  /**
2095   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2096   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2097   * @param start the offset where the insertion begins
2098   * @param srcText the source for the new characters
2099   * @return a reference to this
2100   * @stable ICU 2.0
2101   */
2102  inline UnicodeString& insert(int32_t start,
2103            const UnicodeString& srcText);
2104
2105  /**
2106   * Insert the characters in <TT>srcChars</TT> in the range
2107   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2108   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2109   * @param start the offset at which the insertion begins
2110   * @param srcChars the source for the new characters
2111   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2112   * will be obtained
2113   * @param srcLength the number of characters in <TT>srcChars</TT>
2114   * in the insert string
2115   * @return a reference to this
2116   * @stable ICU 2.0
2117   */
2118  inline UnicodeString& insert(int32_t start,
2119            const UChar *srcChars,
2120            int32_t srcStart,
2121            int32_t srcLength);
2122
2123  /**
2124   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2125   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2126   * @param start the offset where the insertion begins
2127   * @param srcChars the source for the new characters
2128   * @param srcLength the number of Unicode characters in srcChars.
2129   * @return a reference to this
2130   * @stable ICU 2.0
2131   */
2132  inline UnicodeString& insert(int32_t start,
2133            const UChar *srcChars,
2134            int32_t srcLength);
2135
2136  /**
2137   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2138   * offset <TT>start</TT>.
2139   * @param start the offset at which the insertion occurs
2140   * @param srcChar the code unit to insert
2141   * @return a reference to this
2142   * @stable ICU 2.0
2143   */
2144  inline UnicodeString& insert(int32_t start,
2145            UChar srcChar);
2146
2147  /**
2148   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2149   * offset <TT>start</TT>.
2150   * @param start the offset at which the insertion occurs
2151   * @param srcChar the code point to insert
2152   * @return a reference to this
2153   * @stable ICU 2.0
2154   */
2155  inline UnicodeString& insert(int32_t start,
2156            UChar32 srcChar);
2157
2158
2159  /* Replace operations */
2160
2161  /**
2162   * Replace the characters in the range
2163   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2164   * <TT>srcText</TT> in the range
2165   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2166   * <TT>srcText</TT> is not modified.
2167   * @param start the offset at which the replace operation begins
2168   * @param length the number of characters to replace. The character at
2169   * <TT>start + length</TT> is not modified.
2170   * @param srcText the source for the new characters
2171   * @param srcStart the offset into <TT>srcText</TT> where new characters
2172   * will be obtained
2173   * @param srcLength the number of characters in <TT>srcText</TT> in
2174   * the replace string
2175   * @return a reference to this
2176   * @stable ICU 2.0
2177   */
2178  UnicodeString& replace(int32_t start,
2179             int32_t length,
2180             const UnicodeString& srcText,
2181             int32_t srcStart,
2182             int32_t srcLength);
2183
2184  /**
2185   * Replace the characters in the range
2186   * [<TT>start</TT>, <TT>start + length</TT>)
2187   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2188   *  not modified.
2189   * @param start the offset at which the replace operation begins
2190   * @param length the number of characters to replace. The character at
2191   * <TT>start + length</TT> is not modified.
2192   * @param srcText the source for the new characters
2193   * @return a reference to this
2194   * @stable ICU 2.0
2195   */
2196  UnicodeString& replace(int32_t start,
2197             int32_t length,
2198             const UnicodeString& srcText);
2199
2200  /**
2201   * Replace the characters in the range
2202   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2203   * <TT>srcChars</TT> in the range
2204   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2205   * is not modified.
2206   * @param start the offset at which the replace operation begins
2207   * @param length the number of characters to replace.  The character at
2208   * <TT>start + length</TT> is not modified.
2209   * @param srcChars the source for the new characters
2210   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2211   * will be obtained
2212   * @param srcLength the number of characters in <TT>srcChars</TT>
2213   * in the replace string
2214   * @return a reference to this
2215   * @stable ICU 2.0
2216   */
2217  UnicodeString& replace(int32_t start,
2218             int32_t length,
2219             const UChar *srcChars,
2220             int32_t srcStart,
2221             int32_t srcLength);
2222
2223  /**
2224   * Replace the characters in the range
2225   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2226   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2227   * @param start the offset at which the replace operation begins
2228   * @param length number of characters to replace.  The character at
2229   * <TT>start + length</TT> is not modified.
2230   * @param srcChars the source for the new characters
2231   * @param srcLength the number of Unicode characters in srcChars
2232   * @return a reference to this
2233   * @stable ICU 2.0
2234   */
2235  inline UnicodeString& replace(int32_t start,
2236             int32_t length,
2237             const UChar *srcChars,
2238             int32_t srcLength);
2239
2240  /**
2241   * Replace the characters in the range
2242   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2243   * <TT>srcChar</TT>.
2244   * @param start the offset at which the replace operation begins
2245   * @param length the number of characters to replace.  The character at
2246   * <TT>start + length</TT> is not modified.
2247   * @param srcChar the new code unit
2248   * @return a reference to this
2249   * @stable ICU 2.0
2250   */
2251  inline UnicodeString& replace(int32_t start,
2252             int32_t length,
2253             UChar srcChar);
2254
2255  /**
2256   * Replace the characters in the range
2257   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2258   * <TT>srcChar</TT>.
2259   * @param start the offset at which the replace operation begins
2260   * @param length the number of characters to replace.  The character at
2261   * <TT>start + length</TT> is not modified.
2262   * @param srcChar the new code point
2263   * @return a reference to this
2264   * @stable ICU 2.0
2265   */
2266  inline UnicodeString& replace(int32_t start,
2267             int32_t length,
2268             UChar32 srcChar);
2269
2270  /**
2271   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2272   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2273   * @param start the offset at which the replace operation begins
2274   * @param limit the offset immediately following the replace range
2275   * @param srcText the source for the new characters
2276   * @return a reference to this
2277   * @stable ICU 2.0
2278   */
2279  inline UnicodeString& replaceBetween(int32_t start,
2280                int32_t limit,
2281                const UnicodeString& srcText);
2282
2283  /**
2284   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2285   * with the characters in <TT>srcText</TT> in the range
2286   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2287   * @param start the offset at which the replace operation begins
2288   * @param limit the offset immediately following the replace range
2289   * @param srcText the source for the new characters
2290   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2291   * will be obtained
2292   * @param srcLimit the offset immediately following the range to copy
2293   * in <TT>srcText</TT>
2294   * @return a reference to this
2295   * @stable ICU 2.0
2296   */
2297  inline UnicodeString& replaceBetween(int32_t start,
2298                int32_t limit,
2299                const UnicodeString& srcText,
2300                int32_t srcStart,
2301                int32_t srcLimit);
2302
2303  /**
2304   * Replace a substring of this object with the given text.
2305   * @param start the beginning index, inclusive; <code>0 <= start
2306   * <= limit</code>.
2307   * @param limit the ending index, exclusive; <code>start <= limit
2308   * <= length()</code>.
2309   * @param text the text to replace characters <code>start</code>
2310   * to <code>limit - 1</code>
2311   * @stable ICU 2.0
2312   */
2313  virtual void handleReplaceBetween(int32_t start,
2314                                    int32_t limit,
2315                                    const UnicodeString& text);
2316
2317  /**
2318   * Replaceable API
2319   * @return TRUE if it has MetaData
2320   * @stable ICU 2.4
2321   */
2322  virtual UBool hasMetaData() const;
2323
2324  /**
2325   * Copy a substring of this object, retaining attribute (out-of-band)
2326   * information.  This method is used to duplicate or reorder substrings.
2327   * The destination index must not overlap the source range.
2328   *
2329   * @param start the beginning index, inclusive; <code>0 <= start <=
2330   * limit</code>.
2331   * @param limit the ending index, exclusive; <code>start <= limit <=
2332   * length()</code>.
2333   * @param dest the destination index.  The characters from
2334   * <code>start..limit-1</code> will be copied to <code>dest</code>.
2335   * Implementations of this method may assume that <code>dest <= start ||
2336   * dest >= limit</code>.
2337   * @stable ICU 2.0
2338   */
2339  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2340
2341  /* Search and replace operations */
2342
2343  /**
2344   * Replace all occurrences of characters in oldText with the characters
2345   * in newText
2346   * @param oldText the text containing the search text
2347   * @param newText the text containing the replacement text
2348   * @return a reference to this
2349   * @stable ICU 2.0
2350   */
2351  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2352                const UnicodeString& newText);
2353
2354  /**
2355   * Replace all occurrences of characters in oldText with characters
2356   * in newText
2357   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2358   * @param start the start of the range in which replace will performed
2359   * @param length the length of the range in which replace will be performed
2360   * @param oldText the text containing the search text
2361   * @param newText the text containing the replacement text
2362   * @return a reference to this
2363   * @stable ICU 2.0
2364   */
2365  inline UnicodeString& findAndReplace(int32_t start,
2366                int32_t length,
2367                const UnicodeString& oldText,
2368                const UnicodeString& newText);
2369
2370  /**
2371   * Replace all occurrences of characters in oldText in the range
2372   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2373   * in newText in the range
2374   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2375   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2376   * @param start the start of the range in which replace will performed
2377   * @param length the length of the range in which replace will be performed
2378   * @param oldText the text containing the search text
2379   * @param oldStart the start of the search range in <TT>oldText</TT>
2380   * @param oldLength the length of the search range in <TT>oldText</TT>
2381   * @param newText the text containing the replacement text
2382   * @param newStart the start of the replacement range in <TT>newText</TT>
2383   * @param newLength the length of the replacement range in <TT>newText</TT>
2384   * @return a reference to this
2385   * @stable ICU 2.0
2386   */
2387  UnicodeString& findAndReplace(int32_t start,
2388                int32_t length,
2389                const UnicodeString& oldText,
2390                int32_t oldStart,
2391                int32_t oldLength,
2392                const UnicodeString& newText,
2393                int32_t newStart,
2394                int32_t newLength);
2395
2396
2397  /* Remove operations */
2398
2399  /**
2400   * Remove all characters from the UnicodeString object.
2401   * @return a reference to this
2402   * @stable ICU 2.0
2403   */
2404  inline UnicodeString& remove(void);
2405
2406  /**
2407   * Remove the characters in the range
2408   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2409   * @param start the offset of the first character to remove
2410   * @param length the number of characters to remove
2411   * @return a reference to this
2412   * @stable ICU 2.0
2413   */
2414  inline UnicodeString& remove(int32_t start,
2415                               int32_t length = (int32_t)INT32_MAX);
2416
2417  /**
2418   * Remove the characters in the range
2419   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2420   * @param start the offset of the first character to remove
2421   * @param limit the offset immediately following the range to remove
2422   * @return a reference to this
2423   * @stable ICU 2.0
2424   */
2425  inline UnicodeString& removeBetween(int32_t start,
2426                                      int32_t limit = (int32_t)INT32_MAX);
2427
2428  /**
2429   * Retain only the characters in the range
2430   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2431   * Removes characters before <code>start</code> and at and after <code>limit</code>.
2432   * @param start the offset of the first character to retain
2433   * @param limit the offset immediately following the range to retain
2434   * @return a reference to this
2435   * @stable ICU 4.4
2436   */
2437  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2438
2439  /* Length operations */
2440
2441  /**
2442   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2443   * If the length of this UnicodeString is less than targetLength,
2444   * length() - targetLength copies of padChar will be added to the
2445   * beginning of this UnicodeString.
2446   * @param targetLength the desired length of the string
2447   * @param padChar the character to use for padding. Defaults to
2448   * space (U+0020)
2449   * @return TRUE if the text was padded, FALSE otherwise.
2450   * @stable ICU 2.0
2451   */
2452  UBool padLeading(int32_t targetLength,
2453                    UChar padChar = 0x0020);
2454
2455  /**
2456   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2457   * If the length of this UnicodeString is less than targetLength,
2458   * length() - targetLength copies of padChar will be added to the
2459   * end of this UnicodeString.
2460   * @param targetLength the desired length of the string
2461   * @param padChar the character to use for padding. Defaults to
2462   * space (U+0020)
2463   * @return TRUE if the text was padded, FALSE otherwise.
2464   * @stable ICU 2.0
2465   */
2466  UBool padTrailing(int32_t targetLength,
2467                     UChar padChar = 0x0020);
2468
2469  /**
2470   * Truncate this UnicodeString to the <TT>targetLength</TT>.
2471   * @param targetLength the desired length of this UnicodeString.
2472   * @return TRUE if the text was truncated, FALSE otherwise
2473   * @stable ICU 2.0
2474   */
2475  inline UBool truncate(int32_t targetLength);
2476
2477  /**
2478   * Trims leading and trailing whitespace from this UnicodeString.
2479   * @return a reference to this
2480   * @stable ICU 2.0
2481   */
2482  UnicodeString& trim(void);
2483
2484
2485  /* Miscellaneous operations */
2486
2487  /**
2488   * Reverse this UnicodeString in place.
2489   * @return a reference to this
2490   * @stable ICU 2.0
2491   */
2492  inline UnicodeString& reverse(void);
2493
2494  /**
2495   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2496   * this UnicodeString.
2497   * @param start the start of the range to reverse
2498   * @param length the number of characters to to reverse
2499   * @return a reference to this
2500   * @stable ICU 2.0
2501   */
2502  inline UnicodeString& reverse(int32_t start,
2503             int32_t length);
2504
2505  /**
2506   * Convert the characters in this to UPPER CASE following the conventions of
2507   * the default locale.
2508   * @return A reference to this.
2509   * @stable ICU 2.0
2510   */
2511  UnicodeString& toUpper(void);
2512
2513  /**
2514   * Convert the characters in this to UPPER CASE following the conventions of
2515   * a specific locale.
2516   * @param locale The locale containing the conventions to use.
2517   * @return A reference to this.
2518   * @stable ICU 2.0
2519   */
2520  UnicodeString& toUpper(const Locale& locale);
2521
2522  /**
2523   * Convert the characters in this to lower case following the conventions of
2524   * the default locale.
2525   * @return A reference to this.
2526   * @stable ICU 2.0
2527   */
2528  UnicodeString& toLower(void);
2529
2530  /**
2531   * Convert the characters in this to lower case following the conventions of
2532   * a specific locale.
2533   * @param locale The locale containing the conventions to use.
2534   * @return A reference to this.
2535   * @stable ICU 2.0
2536   */
2537  UnicodeString& toLower(const Locale& locale);
2538
2539#if !UCONFIG_NO_BREAK_ITERATION
2540
2541  /**
2542   * Titlecase this string, convenience function using the default locale.
2543   *
2544   * Casing is locale-dependent and context-sensitive.
2545   * Titlecasing uses a break iterator to find the first characters of words
2546   * that are to be titlecased. It titlecases those characters and lowercases
2547   * all others.
2548   *
2549   * The titlecase break iterator can be provided to customize for arbitrary
2550   * styles, using rules and dictionaries beyond the standard iterators.
2551   * It may be more efficient to always provide an iterator to avoid
2552   * opening and closing one for each string.
2553   * The standard titlecase iterator for the root locale implements the
2554   * algorithm of Unicode TR 21.
2555   *
2556   * This function uses only the setText(), first() and next() methods of the
2557   * provided break iterator.
2558   *
2559   * @param titleIter A break iterator to find the first characters of words
2560   *                  that are to be titlecased.
2561   *                  If none is provided (0), then a standard titlecase
2562   *                  break iterator is opened.
2563   *                  Otherwise the provided iterator is set to the string's text.
2564   * @return A reference to this.
2565   * @stable ICU 2.1
2566   */
2567  UnicodeString &toTitle(BreakIterator *titleIter);
2568
2569  /**
2570   * Titlecase this string.
2571   *
2572   * Casing is locale-dependent and context-sensitive.
2573   * Titlecasing uses a break iterator to find the first characters of words
2574   * that are to be titlecased. It titlecases those characters and lowercases
2575   * all others.
2576   *
2577   * The titlecase break iterator can be provided to customize for arbitrary
2578   * styles, using rules and dictionaries beyond the standard iterators.
2579   * It may be more efficient to always provide an iterator to avoid
2580   * opening and closing one for each string.
2581   * The standard titlecase iterator for the root locale implements the
2582   * algorithm of Unicode TR 21.
2583   *
2584   * This function uses only the setText(), first() and next() methods of the
2585   * provided break iterator.
2586   *
2587   * @param titleIter A break iterator to find the first characters of words
2588   *                  that are to be titlecased.
2589   *                  If none is provided (0), then a standard titlecase
2590   *                  break iterator is opened.
2591   *                  Otherwise the provided iterator is set to the string's text.
2592   * @param locale    The locale to consider.
2593   * @return A reference to this.
2594   * @stable ICU 2.1
2595   */
2596  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2597
2598  /**
2599   * Titlecase this string, with options.
2600   *
2601   * Casing is locale-dependent and context-sensitive.
2602   * Titlecasing uses a break iterator to find the first characters of words
2603   * that are to be titlecased. It titlecases those characters and lowercases
2604   * all others. (This can be modified with options.)
2605   *
2606   * The titlecase break iterator can be provided to customize for arbitrary
2607   * styles, using rules and dictionaries beyond the standard iterators.
2608   * It may be more efficient to always provide an iterator to avoid
2609   * opening and closing one for each string.
2610   * The standard titlecase iterator for the root locale implements the
2611   * algorithm of Unicode TR 21.
2612   *
2613   * This function uses only the setText(), first() and next() methods of the
2614   * provided break iterator.
2615   *
2616   * @param titleIter A break iterator to find the first characters of words
2617   *                  that are to be titlecased.
2618   *                  If none is provided (0), then a standard titlecase
2619   *                  break iterator is opened.
2620   *                  Otherwise the provided iterator is set to the string's text.
2621   * @param locale    The locale to consider.
2622   * @param options Options bit set, see ucasemap_open().
2623   * @return A reference to this.
2624   * @see U_TITLECASE_NO_LOWERCASE
2625   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2626   * @see ucasemap_open
2627   * @stable ICU 3.8
2628   */
2629  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2630
2631#endif
2632
2633  /**
2634   * Case-fold the characters in this string.
2635   * Case-folding is locale-independent and not context-sensitive,
2636   * but there is an option for whether to include or exclude mappings for dotted I
2637   * and dotless i that are marked with 'I' in CaseFolding.txt.
2638   * The result may be longer or shorter than the original.
2639   *
2640   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2641   * @return A reference to this.
2642   * @stable ICU 2.0
2643   */
2644  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2645
2646  //========================================
2647  // Access to the internal buffer
2648  //========================================
2649
2650  /**
2651   * Get a read/write pointer to the internal buffer.
2652   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2653   * writable, and is still owned by the UnicodeString object.
2654   * Calls to getBuffer(minCapacity) must not be nested, and
2655   * must be matched with calls to releaseBuffer(newLength).
2656   * If the string buffer was read-only or shared,
2657   * then it will be reallocated and copied.
2658   *
2659   * An attempted nested call will return 0, and will not further modify the
2660   * state of the UnicodeString object.
2661   * It also returns 0 if the string is bogus.
2662   *
2663   * The actual capacity of the string buffer may be larger than minCapacity.
2664   * getCapacity() returns the actual capacity.
2665   * For many operations, the full capacity should be used to avoid reallocations.
2666   *
2667   * While the buffer is "open" between getBuffer(minCapacity)
2668   * and releaseBuffer(newLength), the following applies:
2669   * - The string length is set to 0.
2670   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2671   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2672   * - You can read from and write to the returned buffer.
2673   * - The previous string contents will still be in the buffer;
2674   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2675   *   If the length() was greater than minCapacity, then any contents after minCapacity
2676   *   may be lost.
2677   *   The buffer contents is not NUL-terminated by getBuffer().
2678   *   If length()<getCapacity() then you can terminate it by writing a NUL
2679   *   at index length().
2680   * - You must call releaseBuffer(newLength) before and in order to
2681   *   return to normal UnicodeString operation.
2682   *
2683   * @param minCapacity the minimum number of UChars that are to be available
2684   *        in the buffer, starting at the returned pointer;
2685   *        default to the current string capacity if minCapacity==-1
2686   * @return a writable pointer to the internal string buffer,
2687   *         or 0 if an error occurs (nested calls, out of memory)
2688   *
2689   * @see releaseBuffer
2690   * @see getTerminatedBuffer()
2691   * @stable ICU 2.0
2692   */
2693  UChar *getBuffer(int32_t minCapacity);
2694
2695  /**
2696   * Release a read/write buffer on a UnicodeString object with an
2697   * "open" getBuffer(minCapacity).
2698   * This function must be called in a matched pair with getBuffer(minCapacity).
2699   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2700   *
2701   * It will set the string length to newLength, at most to the current capacity.
2702   * If newLength==-1 then it will set the length according to the
2703   * first NUL in the buffer, or to the capacity if there is no NUL.
2704   *
2705   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2706   *
2707   * @param newLength the new length of the UnicodeString object;
2708   *        defaults to the current capacity if newLength is greater than that;
2709   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2710   *        the current capacity of the string
2711   *
2712   * @see getBuffer(int32_t minCapacity)
2713   * @stable ICU 2.0
2714   */
2715  void releaseBuffer(int32_t newLength=-1);
2716
2717  /**
2718   * Get a read-only pointer to the internal buffer.
2719   * This can be called at any time on a valid UnicodeString.
2720   *
2721   * It returns 0 if the string is bogus, or
2722   * during an "open" getBuffer(minCapacity).
2723   *
2724   * It can be called as many times as desired.
2725   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2726   * at which time the pointer is semantically invalidated and must not be used any more.
2727   *
2728   * The capacity of the buffer can be determined with getCapacity().
2729   * The part after length() may or may not be initialized and valid,
2730   * depending on the history of the UnicodeString object.
2731   *
2732   * The buffer contents is (probably) not NUL-terminated.
2733   * You can check if it is with
2734   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2735   * (See getTerminatedBuffer().)
2736   *
2737   * The buffer may reside in read-only memory. Its contents must not
2738   * be modified.
2739   *
2740   * @return a read-only pointer to the internal string buffer,
2741   *         or 0 if the string is empty or bogus
2742   *
2743   * @see getBuffer(int32_t minCapacity)
2744   * @see getTerminatedBuffer()
2745   * @stable ICU 2.0
2746   */
2747  inline const UChar *getBuffer() const;
2748
2749  /**
2750   * Get a read-only pointer to the internal buffer,
2751   * making sure that it is NUL-terminated.
2752   * This can be called at any time on a valid UnicodeString.
2753   *
2754   * It returns 0 if the string is bogus, or
2755   * during an "open" getBuffer(minCapacity), or if the buffer cannot
2756   * be NUL-terminated (because memory allocation failed).
2757   *
2758   * It can be called as many times as desired.
2759   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2760   * at which time the pointer is semantically invalidated and must not be used any more.
2761   *
2762   * The capacity of the buffer can be determined with getCapacity().
2763   * The part after length()+1 may or may not be initialized and valid,
2764   * depending on the history of the UnicodeString object.
2765   *
2766   * The buffer contents is guaranteed to be NUL-terminated.
2767   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2768   * is written.
2769   * For this reason, this function is not const, unlike getBuffer().
2770   * Note that a UnicodeString may also contain NUL characters as part of its contents.
2771   *
2772   * The buffer may reside in read-only memory. Its contents must not
2773   * be modified.
2774   *
2775   * @return a read-only pointer to the internal string buffer,
2776   *         or 0 if the string is empty or bogus
2777   *
2778   * @see getBuffer(int32_t minCapacity)
2779   * @see getBuffer()
2780   * @stable ICU 2.2
2781   */
2782  inline const UChar *getTerminatedBuffer();
2783
2784  //========================================
2785  // Constructors
2786  //========================================
2787
2788  /** Construct an empty UnicodeString.
2789   * @stable ICU 2.0
2790   */
2791  UnicodeString();
2792
2793  /**
2794   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2795   * @param capacity the number of UChars this UnicodeString should hold
2796   * before a resize is necessary; if count is greater than 0 and count
2797   * code points c take up more space than capacity, then capacity is adjusted
2798   * accordingly.
2799   * @param c is used to initially fill the string
2800   * @param count specifies how many code points c are to be written in the
2801   *              string
2802   * @stable ICU 2.0
2803   */
2804  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2805
2806  /**
2807   * Single UChar (code unit) constructor.
2808   * @param ch the character to place in the UnicodeString
2809   * @stable ICU 2.0
2810   */
2811  UnicodeString(UChar ch);
2812
2813  /**
2814   * Single UChar32 (code point) constructor.
2815   * @param ch the character to place in the UnicodeString
2816   * @stable ICU 2.0
2817   */
2818  UnicodeString(UChar32 ch);
2819
2820  /**
2821   * UChar* constructor.
2822   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
2823   * must be NULL (U+0000) terminated.
2824   * @stable ICU 2.0
2825   */
2826  UnicodeString(const UChar *text);
2827
2828  /**
2829   * UChar* constructor.
2830   * @param text The characters to place in the UnicodeString.
2831   * @param textLength The number of Unicode characters in <TT>text</TT>
2832   * to copy.
2833   * @stable ICU 2.0
2834   */
2835  UnicodeString(const UChar *text,
2836        int32_t textLength);
2837
2838  /**
2839   * Readonly-aliasing UChar* constructor.
2840   * The text will be used for the UnicodeString object, but
2841   * it will not be released when the UnicodeString is destroyed.
2842   * This has copy-on-write semantics:
2843   * When the string is modified, then the buffer is first copied into
2844   * newly allocated memory.
2845   * The aliased buffer is never modified.
2846   * In an assignment to another UnicodeString, the text will be aliased again,
2847   * so that both strings then alias the same readonly-text.
2848   *
2849   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2850   *                     This must be true if <code>textLength==-1</code>.
2851   * @param text The characters to alias for the UnicodeString.
2852   * @param textLength The number of Unicode characters in <code>text</code> to alias.
2853   *                   If -1, then this constructor will determine the length
2854   *                   by calling <code>u_strlen()</code>.
2855   * @stable ICU 2.0
2856   */
2857  UnicodeString(UBool isTerminated,
2858                const UChar *text,
2859                int32_t textLength);
2860
2861  /**
2862   * Writable-aliasing UChar* constructor.
2863   * The text will be used for the UnicodeString object, but
2864   * it will not be released when the UnicodeString is destroyed.
2865   * This has write-through semantics:
2866   * For as long as the capacity of the buffer is sufficient, write operations
2867   * will directly affect the buffer. When more capacity is necessary, then
2868   * a new buffer will be allocated and the contents copied as with regularly
2869   * constructed strings.
2870   * In an assignment to another UnicodeString, the buffer will be copied.
2871   * The extract(UChar *dst) function detects whether the dst pointer is the same
2872   * as the string buffer itself and will in this case not copy the contents.
2873   *
2874   * @param buffer The characters to alias for the UnicodeString.
2875   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2876   * @param buffCapacity The size of <code>buffer</code> in UChars.
2877   * @stable ICU 2.0
2878   */
2879  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2880
2881#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2882
2883  /**
2884   * char* constructor.
2885   * @param codepageData an array of bytes, null-terminated,
2886   *                     in the platform's default codepage.
2887   * @stable ICU 2.0
2888   */
2889  UnicodeString(const char *codepageData);
2890
2891  /**
2892   * char* constructor.
2893   * @param codepageData an array of bytes in the platform's default codepage.
2894   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2895   * @stable ICU 2.0
2896   */
2897  UnicodeString(const char *codepageData, int32_t dataLength);
2898
2899#endif
2900
2901#if !UCONFIG_NO_CONVERSION
2902
2903  /**
2904   * char* constructor.
2905   * @param codepageData an array of bytes, null-terminated
2906   * @param codepage the encoding of <TT>codepageData</TT>.  The special
2907   * value 0 for <TT>codepage</TT> indicates that the text is in the
2908   * platform's default codepage.
2909   *
2910   * If <code>codepage</code> is an empty string (<code>""</code>),
2911   * then a simple conversion is performed on the codepage-invariant
2912   * subset ("invariant characters") of the platform encoding. See utypes.h.
2913   * Recommendation: For invariant-character strings use the constructor
2914   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2915   * because it avoids object code dependencies of UnicodeString on
2916   * the conversion code.
2917   *
2918   * @stable ICU 2.0
2919   */
2920  UnicodeString(const char *codepageData, const char *codepage);
2921
2922  /**
2923   * char* constructor.
2924   * @param codepageData an array of bytes.
2925   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2926   * @param codepage the encoding of <TT>codepageData</TT>.  The special
2927   * value 0 for <TT>codepage</TT> indicates that the text is in the
2928   * platform's default codepage.
2929   * If <code>codepage</code> is an empty string (<code>""</code>),
2930   * then a simple conversion is performed on the codepage-invariant
2931   * subset ("invariant characters") of the platform encoding. See utypes.h.
2932   * Recommendation: For invariant-character strings use the constructor
2933   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2934   * because it avoids object code dependencies of UnicodeString on
2935   * the conversion code.
2936   *
2937   * @stable ICU 2.0
2938   */
2939  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
2940
2941  /**
2942   * char * / UConverter constructor.
2943   * This constructor uses an existing UConverter object to
2944   * convert the codepage string to Unicode and construct a UnicodeString
2945   * from that.
2946   *
2947   * The converter is reset at first.
2948   * If the error code indicates a failure before this constructor is called,
2949   * or if an error occurs during conversion or construction,
2950   * then the string will be bogus.
2951   *
2952   * This function avoids the overhead of opening and closing a converter if
2953   * multiple strings are constructed.
2954   *
2955   * @param src input codepage string
2956   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2957   * @param cnv converter object (ucnv_resetToUnicode() will be called),
2958   *        can be NULL for the default converter
2959   * @param errorCode normal ICU error code
2960   * @stable ICU 2.0
2961   */
2962  UnicodeString(
2963        const char *src, int32_t srcLength,
2964        UConverter *cnv,
2965        UErrorCode &errorCode);
2966
2967#endif
2968
2969  /**
2970   * Constructs a Unicode string from an invariant-character char * string.
2971   * About invariant characters see utypes.h.
2972   * This constructor has no runtime dependency on conversion code and is
2973   * therefore recommended over ones taking a charset name string
2974   * (where the empty string "" indicates invariant-character conversion).
2975   *
2976   * Use the macro US_INV as the third, signature-distinguishing parameter.
2977   *
2978   * For example:
2979   * \code
2980   * void fn(const char *s) {
2981   *   UnicodeString ustr(s, -1, US_INV);
2982   *   // use ustr ...
2983   * }
2984   * \endcode
2985   *
2986   * @param src String using only invariant characters.
2987   * @param length Length of src, or -1 if NUL-terminated.
2988   * @param inv Signature-distinguishing paramater, use US_INV.
2989   *
2990   * @see US_INV
2991   * @stable ICU 3.2
2992   */
2993  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2994
2995
2996  /**
2997   * Copy constructor.
2998   * @param that The UnicodeString object to copy.
2999   * @stable ICU 2.0
3000   */
3001  UnicodeString(const UnicodeString& that);
3002
3003  /**
3004   * 'Substring' constructor from tail of source string.
3005   * @param src The UnicodeString object to copy.
3006   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3007   * @stable ICU 2.2
3008   */
3009  UnicodeString(const UnicodeString& src, int32_t srcStart);
3010
3011  /**
3012   * 'Substring' constructor from subrange of source string.
3013   * @param src The UnicodeString object to copy.
3014   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3015   * @param srcLength The number of characters from <tt>src</tt> to copy.
3016   * @stable ICU 2.2
3017   */
3018  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3019
3020  /**
3021   * Clone this object, an instance of a subclass of Replaceable.
3022   * Clones can be used concurrently in multiple threads.
3023   * If a subclass does not implement clone(), or if an error occurs,
3024   * then NULL is returned.
3025   * The clone functions in all subclasses return a pointer to a Replaceable
3026   * because some compilers do not support covariant (same-as-this)
3027   * return types; cast to the appropriate subclass if necessary.
3028   * The caller must delete the clone.
3029   *
3030   * @return a clone of this object
3031   *
3032   * @see Replaceable::clone
3033   * @see getDynamicClassID
3034   * @stable ICU 2.6
3035   */
3036  virtual Replaceable *clone() const;
3037
3038  /** Destructor.
3039   * @stable ICU 2.0
3040   */
3041  virtual ~UnicodeString();
3042
3043  /**
3044   * Create a UnicodeString from a UTF-8 string.
3045   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3046   * Calls u_strFromUTF8WithSub().
3047   *
3048   * @param utf8 UTF-8 input string.
3049   *             Note that a StringPiece can be implicitly constructed
3050   *             from a std::string or a NUL-terminated const char * string.
3051   * @return A UnicodeString with equivalent UTF-16 contents.
3052   * @see toUTF8
3053   * @see toUTF8String
3054   * @stable ICU 4.2
3055   */
3056  static UnicodeString fromUTF8(const StringPiece &utf8);
3057
3058  /**
3059   * Create a UnicodeString from a UTF-32 string.
3060   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3061   * Calls u_strFromUTF32WithSub().
3062   *
3063   * @param utf32 UTF-32 input string. Must not be NULL.
3064   * @param length Length of the input string, or -1 if NUL-terminated.
3065   * @return A UnicodeString with equivalent UTF-16 contents.
3066   * @see toUTF32
3067   * @stable ICU 4.2
3068   */
3069  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3070
3071  /* Miscellaneous operations */
3072
3073  /**
3074   * Unescape a string of characters and return a string containing
3075   * the result.  The following escape sequences are recognized:
3076   *
3077   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3078   * \\Uhhhhhhhh   8 hex digits
3079   * \\xhh         1-2 hex digits
3080   * \\ooo         1-3 octal digits; o in [0-7]
3081   * \\cX          control-X; X is masked with 0x1F
3082   *
3083   * as well as the standard ANSI C escapes:
3084   *
3085   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3086   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3087   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3088   *
3089   * Anything else following a backslash is generically escaped.  For
3090   * example, "[a\\-z]" returns "[a-z]".
3091   *
3092   * If an escape sequence is ill-formed, this method returns an empty
3093   * string.  An example of an ill-formed sequence is "\\u" followed by
3094   * fewer than 4 hex digits.
3095   *
3096   * This function is similar to u_unescape() but not identical to it.
3097   * The latter takes a source char*, so it does escape recognition
3098   * and also invariant conversion.
3099   *
3100   * @return a string with backslash escapes interpreted, or an
3101   * empty string on error.
3102   * @see UnicodeString#unescapeAt()
3103   * @see u_unescape()
3104   * @see u_unescapeAt()
3105   * @stable ICU 2.0
3106   */
3107  UnicodeString unescape() const;
3108
3109  /**
3110   * Unescape a single escape sequence and return the represented
3111   * character.  See unescape() for a listing of the recognized escape
3112   * sequences.  The character at offset-1 is assumed (without
3113   * checking) to be a backslash.  If the escape sequence is
3114   * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
3115   * returned.
3116   *
3117   * @param offset an input output parameter.  On input, it is the
3118   * offset into this string where the escape sequence is located,
3119   * after the initial backslash.  On output, it is advanced after the
3120   * last character parsed.  On error, it is not advanced at all.
3121   * @return the character represented by the escape sequence at
3122   * offset, or (UChar32)0xFFFFFFFF on error.
3123   * @see UnicodeString#unescape()
3124   * @see u_unescape()
3125   * @see u_unescapeAt()
3126   * @stable ICU 2.0
3127   */
3128  UChar32 unescapeAt(int32_t &offset) const;
3129
3130  /**
3131   * ICU "poor man's RTTI", returns a UClassID for this class.
3132   *
3133   * @stable ICU 2.2
3134   */
3135  static UClassID U_EXPORT2 getStaticClassID();
3136
3137  /**
3138   * ICU "poor man's RTTI", returns a UClassID for the actual class.
3139   *
3140   * @stable ICU 2.2
3141   */
3142  virtual UClassID getDynamicClassID() const;
3143
3144  //========================================
3145  // Implementation methods
3146  //========================================
3147
3148protected:
3149  /**
3150   * Implement Replaceable::getLength() (see jitterbug 1027).
3151   * @stable ICU 2.4
3152   */
3153  virtual int32_t getLength() const;
3154
3155  /**
3156   * The change in Replaceable to use virtual getCharAt() allows
3157   * UnicodeString::charAt() to be inline again (see jitterbug 709).
3158   * @stable ICU 2.4
3159   */
3160  virtual UChar getCharAt(int32_t offset) const;
3161
3162  /**
3163   * The change in Replaceable to use virtual getChar32At() allows
3164   * UnicodeString::char32At() to be inline again (see jitterbug 709).
3165   * @stable ICU 2.4
3166   */
3167  virtual UChar32 getChar32At(int32_t offset) const;
3168
3169private:
3170  // For char* constructors. Could be made public.
3171  UnicodeString &setToUTF8(const StringPiece &utf8);
3172  // For extract(char*).
3173  // We could make a toUTF8(target, capacity, errorCode) public but not
3174  // this version: New API will be cleaner if we make callers create substrings
3175  // rather than having start+length on every method,
3176  // and it should take a UErrorCode&.
3177  int32_t
3178  toUTF8(int32_t start, int32_t len,
3179         char *target, int32_t capacity) const;
3180
3181
3182  inline int8_t
3183  doCompare(int32_t start,
3184           int32_t length,
3185           const UnicodeString& srcText,
3186           int32_t srcStart,
3187           int32_t srcLength) const;
3188
3189  int8_t doCompare(int32_t start,
3190           int32_t length,
3191           const UChar *srcChars,
3192           int32_t srcStart,
3193           int32_t srcLength) const;
3194
3195  inline int8_t
3196  doCompareCodePointOrder(int32_t start,
3197                          int32_t length,
3198                          const UnicodeString& srcText,
3199                          int32_t srcStart,
3200                          int32_t srcLength) const;
3201
3202  int8_t doCompareCodePointOrder(int32_t start,
3203                                 int32_t length,
3204                                 const UChar *srcChars,
3205                                 int32_t srcStart,
3206                                 int32_t srcLength) const;
3207
3208  inline int8_t
3209  doCaseCompare(int32_t start,
3210                int32_t length,
3211                const UnicodeString &srcText,
3212                int32_t srcStart,
3213                int32_t srcLength,
3214                uint32_t options) const;
3215
3216  int8_t
3217  doCaseCompare(int32_t start,
3218                int32_t length,
3219                const UChar *srcChars,
3220                int32_t srcStart,
3221                int32_t srcLength,
3222                uint32_t options) const;
3223
3224  int32_t doIndexOf(UChar c,
3225            int32_t start,
3226            int32_t length) const;
3227
3228  int32_t doIndexOf(UChar32 c,
3229                        int32_t start,
3230                        int32_t length) const;
3231
3232  int32_t doLastIndexOf(UChar c,
3233                int32_t start,
3234                int32_t length) const;
3235
3236  int32_t doLastIndexOf(UChar32 c,
3237                            int32_t start,
3238                            int32_t length) const;
3239
3240  void doExtract(int32_t start,
3241         int32_t length,
3242         UChar *dst,
3243         int32_t dstStart) const;
3244
3245  inline void doExtract(int32_t start,
3246         int32_t length,
3247         UnicodeString& target) const;
3248
3249  inline UChar doCharAt(int32_t offset)  const;
3250
3251  UnicodeString& doReplace(int32_t start,
3252               int32_t length,
3253               const UnicodeString& srcText,
3254               int32_t srcStart,
3255               int32_t srcLength);
3256
3257  UnicodeString& doReplace(int32_t start,
3258               int32_t length,
3259               const UChar *srcChars,
3260               int32_t srcStart,
3261               int32_t srcLength);
3262
3263  UnicodeString& doReverse(int32_t start,
3264               int32_t length);
3265
3266  // calculate hash code
3267  int32_t doHashCode(void) const;
3268
3269  // get pointer to start of array
3270  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3271  inline UChar* getArrayStart(void);
3272  inline const UChar* getArrayStart(void) const;
3273
3274  // A UnicodeString object (not necessarily its current buffer)
3275  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3276  inline UBool isWritable() const;
3277
3278  // Is the current buffer writable?
3279  inline UBool isBufferWritable() const;
3280
3281  // None of the following does releaseArray().
3282  inline void setLength(int32_t len);        // sets only fShortLength and fLength
3283  inline void setToEmpty();                  // sets fFlags=kShortString
3284  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3285
3286  // allocate the array; result may be fStackBuffer
3287  // sets refCount to 1 if appropriate
3288  // sets fArray, fCapacity, and fFlags
3289  // returns boolean for success or failure
3290  UBool allocate(int32_t capacity);
3291
3292  // release the array if owned
3293  void releaseArray(void);
3294
3295  // turn a bogus string into an empty one
3296  void unBogus();
3297
3298  // implements assigment operator, copy constructor, and fastCopyFrom()
3299  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3300
3301  // Pin start and limit to acceptable values.
3302  inline void pinIndex(int32_t& start) const;
3303  inline void pinIndices(int32_t& start,
3304                         int32_t& length) const;
3305
3306#if !UCONFIG_NO_CONVERSION
3307
3308  /* Internal extract() using UConverter. */
3309  int32_t doExtract(int32_t start, int32_t length,
3310                    char *dest, int32_t destCapacity,
3311                    UConverter *cnv,
3312                    UErrorCode &errorCode) const;
3313
3314  /*
3315   * Real constructor for converting from codepage data.
3316   * It assumes that it is called with !fRefCounted.
3317   *
3318   * If <code>codepage==0</code>, then the default converter
3319   * is used for the platform encoding.
3320   * If <code>codepage</code> is an empty string (<code>""</code>),
3321   * then a simple conversion is performed on the codepage-invariant
3322   * subset ("invariant characters") of the platform encoding. See utypes.h.
3323   */
3324  void doCodepageCreate(const char *codepageData,
3325                        int32_t dataLength,
3326                        const char *codepage);
3327
3328  /*
3329   * Worker function for creating a UnicodeString from
3330   * a codepage string using a UConverter.
3331   */
3332  void
3333  doCodepageCreate(const char *codepageData,
3334                   int32_t dataLength,
3335                   UConverter *converter,
3336                   UErrorCode &status);
3337
3338#endif
3339
3340  /*
3341   * This function is called when write access to the array
3342   * is necessary.
3343   *
3344   * We need to make a copy of the array if
3345   * the buffer is read-only, or
3346   * the buffer is refCounted (shared), and refCount>1, or
3347   * the buffer is too small.
3348   *
3349   * Return FALSE if memory could not be allocated.
3350   */
3351  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3352                            int32_t growCapacity = -1,
3353                            UBool doCopyArray = TRUE,
3354                            int32_t **pBufferToDelete = 0,
3355                            UBool forceClone = FALSE);
3356
3357  // common function for case mappings
3358  UnicodeString &
3359  caseMap(BreakIterator *titleIter,
3360          const char *locale,
3361          uint32_t options,
3362          int32_t toWhichCase);
3363
3364  // ref counting
3365  void addRef(void);
3366  int32_t removeRef(void);
3367  int32_t refCount(void) const;
3368
3369  // constants
3370  enum {
3371    // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
3372    // 32-bit pointers: 4+1+1+13*2 = 32 bytes
3373    // 64-bit pointers: 8+1+1+15*2 = 40 bytes
3374    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
3375    kInvalidUChar=0xffff, // invalid UChar index
3376    kGrowSize=128, // grow size for this buffer
3377    kInvalidHashCode=0, // invalid hash code
3378    kEmptyHashCode=1, // hash code for empty string
3379
3380    // bit flag values for fFlags
3381    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3382    kUsingStackBuffer=2,// fArray==fStackBuffer
3383    kRefCounted=4,      // there is a refCount field before the characters in fArray
3384    kBufferIsReadonly=8,// do not write to this buffer
3385    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3386                        // and releaseBuffer(newLength) must be called
3387
3388    // combined values for convenience
3389    kShortString=kUsingStackBuffer,
3390    kLongString=kRefCounted,
3391    kReadonlyAlias=kBufferIsReadonly,
3392    kWritableAlias=0
3393  };
3394
3395  friend class StringThreadTest;
3396
3397  union StackBufferOrFields;        // forward declaration necessary before friend declaration
3398  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3399
3400  /*
3401   * The following are all the class fields that are stored
3402   * in each UnicodeString object.
3403   * Note that UnicodeString has virtual functions,
3404   * therefore there is an implicit vtable pointer
3405   * as the first real field.
3406   * The fields should be aligned such that no padding is
3407   * necessary, mostly by having larger types first.
3408   * On 32-bit machines, the size should be 32 bytes,
3409   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3410   */
3411  // (implicit) *vtable;
3412  int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
3413  uint8_t   fFlags;         // bit flags: see constants above
3414  union StackBufferOrFields {
3415    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3416    // else fFields is used
3417    UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
3418    struct {
3419      uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
3420      int32_t   fLength;    // number of characters in fArray if >127; else undefined
3421      UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
3422      int32_t   fCapacity;  // sizeof fArray
3423    } fFields;
3424  } fUnion;
3425};
3426
3427/**
3428 * Create a new UnicodeString with the concatenation of two others.
3429 *
3430 * @param s1 The first string to be copied to the new one.
3431 * @param s2 The second string to be copied to the new one, after s1.
3432 * @return UnicodeString(s1).append(s2)
3433 * @stable ICU 2.8
3434 */
3435U_COMMON_API UnicodeString U_EXPORT2
3436operator+ (const UnicodeString &s1, const UnicodeString &s2);
3437
3438//========================================
3439// Inline members
3440//========================================
3441
3442//========================================
3443// Privates
3444//========================================
3445
3446inline void
3447UnicodeString::pinIndex(int32_t& start) const
3448{
3449  // pin index
3450  if(start < 0) {
3451    start = 0;
3452  } else if(start > length()) {
3453    start = length();
3454  }
3455}
3456
3457inline void
3458UnicodeString::pinIndices(int32_t& start,
3459                          int32_t& _length) const
3460{
3461  // pin indices
3462  int32_t len = length();
3463  if(start < 0) {
3464    start = 0;
3465  } else if(start > len) {
3466    start = len;
3467  }
3468  if(_length < 0) {
3469    _length = 0;
3470  } else if(_length > (len - start)) {
3471    _length = (len - start);
3472  }
3473}
3474
3475inline UChar*
3476UnicodeString::getArrayStart()
3477{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3478
3479inline const UChar*
3480UnicodeString::getArrayStart() const
3481{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3482
3483//========================================
3484// Read-only implementation methods
3485//========================================
3486inline int32_t
3487UnicodeString::length() const
3488{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3489
3490inline int32_t
3491UnicodeString::getCapacity() const
3492{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3493
3494inline int32_t
3495UnicodeString::hashCode() const
3496{ return doHashCode(); }
3497
3498inline UBool
3499UnicodeString::isBogus() const
3500{ return (UBool)(fFlags & kIsBogus); }
3501
3502inline UBool
3503UnicodeString::isWritable() const
3504{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3505
3506inline UBool
3507UnicodeString::isBufferWritable() const
3508{
3509  return (UBool)(
3510      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3511      (!(fFlags&kRefCounted) || refCount()==1));
3512}
3513
3514inline const UChar *
3515UnicodeString::getBuffer() const {
3516  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3517    return 0;
3518  } else if(fFlags&kUsingStackBuffer) {
3519    return fUnion.fStackBuffer;
3520  } else {
3521    return fUnion.fFields.fArray;
3522  }
3523}
3524
3525//========================================
3526// Read-only alias methods
3527//========================================
3528inline int8_t
3529UnicodeString::doCompare(int32_t start,
3530              int32_t thisLength,
3531              const UnicodeString& srcText,
3532              int32_t srcStart,
3533              int32_t srcLength) const
3534{
3535  if(srcText.isBogus()) {
3536    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3537  } else {
3538    srcText.pinIndices(srcStart, srcLength);
3539    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3540  }
3541}
3542
3543inline UBool
3544UnicodeString::operator== (const UnicodeString& text) const
3545{
3546  if(isBogus()) {
3547    return text.isBogus();
3548  } else {
3549    int32_t len = length(), textLength = text.length();
3550    return
3551      !text.isBogus() &&
3552      len == textLength &&
3553      doCompare(0, len, text, 0, textLength) == 0;
3554  }
3555}
3556
3557inline UBool
3558UnicodeString::operator!= (const UnicodeString& text) const
3559{ return (! operator==(text)); }
3560
3561inline UBool
3562UnicodeString::operator> (const UnicodeString& text) const
3563{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3564
3565inline UBool
3566UnicodeString::operator< (const UnicodeString& text) const
3567{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3568
3569inline UBool
3570UnicodeString::operator>= (const UnicodeString& text) const
3571{ return doCompare(0, length(), text, 0, text.length()) != -1; }
3572
3573inline UBool
3574UnicodeString::operator<= (const UnicodeString& text) const
3575{ return doCompare(0, length(), text, 0, text.length()) != 1; }
3576
3577inline int8_t
3578UnicodeString::compare(const UnicodeString& text) const
3579{ return doCompare(0, length(), text, 0, text.length()); }
3580
3581inline int8_t
3582UnicodeString::compare(int32_t start,
3583               int32_t _length,
3584               const UnicodeString& srcText) const
3585{ return doCompare(start, _length, srcText, 0, srcText.length()); }
3586
3587inline int8_t
3588UnicodeString::compare(const UChar *srcChars,
3589               int32_t srcLength) const
3590{ return doCompare(0, length(), srcChars, 0, srcLength); }
3591
3592inline int8_t
3593UnicodeString::compare(int32_t start,
3594               int32_t _length,
3595               const UnicodeString& srcText,
3596               int32_t srcStart,
3597               int32_t srcLength) const
3598{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3599
3600inline int8_t
3601UnicodeString::compare(int32_t start,
3602               int32_t _length,
3603               const UChar *srcChars) const
3604{ return doCompare(start, _length, srcChars, 0, _length); }
3605
3606inline int8_t
3607UnicodeString::compare(int32_t start,
3608               int32_t _length,
3609               const UChar *srcChars,
3610               int32_t srcStart,
3611               int32_t srcLength) const
3612{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3613
3614inline int8_t
3615UnicodeString::compareBetween(int32_t start,
3616                  int32_t limit,
3617                  const UnicodeString& srcText,
3618                  int32_t srcStart,
3619                  int32_t srcLimit) const
3620{ return doCompare(start, limit - start,
3621           srcText, srcStart, srcLimit - srcStart); }
3622
3623inline int8_t
3624UnicodeString::doCompareCodePointOrder(int32_t start,
3625                                       int32_t thisLength,
3626                                       const UnicodeString& srcText,
3627                                       int32_t srcStart,
3628                                       int32_t srcLength) const
3629{
3630  if(srcText.isBogus()) {
3631    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3632  } else {
3633    srcText.pinIndices(srcStart, srcLength);
3634    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3635  }
3636}
3637
3638inline int8_t
3639UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3640{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3641
3642inline int8_t
3643UnicodeString::compareCodePointOrder(int32_t start,
3644                                     int32_t _length,
3645                                     const UnicodeString& srcText) const
3646{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3647
3648inline int8_t
3649UnicodeString::compareCodePointOrder(const UChar *srcChars,
3650                                     int32_t srcLength) const
3651{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3652
3653inline int8_t
3654UnicodeString::compareCodePointOrder(int32_t start,
3655                                     int32_t _length,
3656                                     const UnicodeString& srcText,
3657                                     int32_t srcStart,
3658                                     int32_t srcLength) const
3659{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3660
3661inline int8_t
3662UnicodeString::compareCodePointOrder(int32_t start,
3663                                     int32_t _length,
3664                                     const UChar *srcChars) const
3665{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3666
3667inline int8_t
3668UnicodeString::compareCodePointOrder(int32_t start,
3669                                     int32_t _length,
3670                                     const UChar *srcChars,
3671                                     int32_t srcStart,
3672                                     int32_t srcLength) const
3673{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3674
3675inline int8_t
3676UnicodeString::compareCodePointOrderBetween(int32_t start,
3677                                            int32_t limit,
3678                                            const UnicodeString& srcText,
3679                                            int32_t srcStart,
3680                                            int32_t srcLimit) const
3681{ return doCompareCodePointOrder(start, limit - start,
3682           srcText, srcStart, srcLimit - srcStart); }
3683
3684inline int8_t
3685UnicodeString::doCaseCompare(int32_t start,
3686                             int32_t thisLength,
3687                             const UnicodeString &srcText,
3688                             int32_t srcStart,
3689                             int32_t srcLength,
3690                             uint32_t options) const
3691{
3692  if(srcText.isBogus()) {
3693    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3694  } else {
3695    srcText.pinIndices(srcStart, srcLength);
3696    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3697  }
3698}
3699
3700inline int8_t
3701UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3702  return doCaseCompare(0, length(), text, 0, text.length(), options);
3703}
3704
3705inline int8_t
3706UnicodeString::caseCompare(int32_t start,
3707                           int32_t _length,
3708                           const UnicodeString &srcText,
3709                           uint32_t options) const {
3710  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3711}
3712
3713inline int8_t
3714UnicodeString::caseCompare(const UChar *srcChars,
3715                           int32_t srcLength,
3716                           uint32_t options) const {
3717  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3718}
3719
3720inline int8_t
3721UnicodeString::caseCompare(int32_t start,
3722                           int32_t _length,
3723                           const UnicodeString &srcText,
3724                           int32_t srcStart,
3725                           int32_t srcLength,
3726                           uint32_t options) const {
3727  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3728}
3729
3730inline int8_t
3731UnicodeString::caseCompare(int32_t start,
3732                           int32_t _length,
3733                           const UChar *srcChars,
3734                           uint32_t options) const {
3735  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3736}
3737
3738inline int8_t
3739UnicodeString::caseCompare(int32_t start,
3740                           int32_t _length,
3741                           const UChar *srcChars,
3742                           int32_t srcStart,
3743                           int32_t srcLength,
3744                           uint32_t options) const {
3745  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3746}
3747
3748inline int8_t
3749UnicodeString::caseCompareBetween(int32_t start,
3750                                  int32_t limit,
3751                                  const UnicodeString &srcText,
3752                                  int32_t srcStart,
3753                                  int32_t srcLimit,
3754                                  uint32_t options) const {
3755  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3756}
3757
3758inline int32_t
3759UnicodeString::indexOf(const UnicodeString& srcText,
3760               int32_t srcStart,
3761               int32_t srcLength,
3762               int32_t start,
3763               int32_t _length) const
3764{
3765  if(!srcText.isBogus()) {
3766    srcText.pinIndices(srcStart, srcLength);
3767    if(srcLength > 0) {
3768      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3769    }
3770  }
3771  return -1;
3772}
3773
3774inline int32_t
3775UnicodeString::indexOf(const UnicodeString& text) const
3776{ return indexOf(text, 0, text.length(), 0, length()); }
3777
3778inline int32_t
3779UnicodeString::indexOf(const UnicodeString& text,
3780               int32_t start) const {
3781  pinIndex(start);
3782  return indexOf(text, 0, text.length(), start, length() - start);
3783}
3784
3785inline int32_t
3786UnicodeString::indexOf(const UnicodeString& text,
3787               int32_t start,
3788               int32_t _length) const
3789{ return indexOf(text, 0, text.length(), start, _length); }
3790
3791inline int32_t
3792UnicodeString::indexOf(const UChar *srcChars,
3793               int32_t srcLength,
3794               int32_t start) const {
3795  pinIndex(start);
3796  return indexOf(srcChars, 0, srcLength, start, length() - start);
3797}
3798
3799inline int32_t
3800UnicodeString::indexOf(const UChar *srcChars,
3801               int32_t srcLength,
3802               int32_t start,
3803               int32_t _length) const
3804{ return indexOf(srcChars, 0, srcLength, start, _length); }
3805
3806inline int32_t
3807UnicodeString::indexOf(UChar c,
3808               int32_t start,
3809               int32_t _length) const
3810{ return doIndexOf(c, start, _length); }
3811
3812inline int32_t
3813UnicodeString::indexOf(UChar32 c,
3814               int32_t start,
3815               int32_t _length) const
3816{ return doIndexOf(c, start, _length); }
3817
3818inline int32_t
3819UnicodeString::indexOf(UChar c) const
3820{ return doIndexOf(c, 0, length()); }
3821
3822inline int32_t
3823UnicodeString::indexOf(UChar32 c) const
3824{ return indexOf(c, 0, length()); }
3825
3826inline int32_t
3827UnicodeString::indexOf(UChar c,
3828               int32_t start) const {
3829  pinIndex(start);
3830  return doIndexOf(c, start, length() - start);
3831}
3832
3833inline int32_t
3834UnicodeString::indexOf(UChar32 c,
3835               int32_t start) const {
3836  pinIndex(start);
3837  return indexOf(c, start, length() - start);
3838}
3839
3840inline int32_t
3841UnicodeString::lastIndexOf(const UChar *srcChars,
3842               int32_t srcLength,
3843               int32_t start,
3844               int32_t _length) const
3845{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3846
3847inline int32_t
3848UnicodeString::lastIndexOf(const UChar *srcChars,
3849               int32_t srcLength,
3850               int32_t start) const {
3851  pinIndex(start);
3852  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3853}
3854
3855inline int32_t
3856UnicodeString::lastIndexOf(const UnicodeString& srcText,
3857               int32_t srcStart,
3858               int32_t srcLength,
3859               int32_t start,
3860               int32_t _length) const
3861{
3862  if(!srcText.isBogus()) {
3863    srcText.pinIndices(srcStart, srcLength);
3864    if(srcLength > 0) {
3865      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3866    }
3867  }
3868  return -1;
3869}
3870
3871inline int32_t
3872UnicodeString::lastIndexOf(const UnicodeString& text,
3873               int32_t start,
3874               int32_t _length) const
3875{ return lastIndexOf(text, 0, text.length(), start, _length); }
3876
3877inline int32_t
3878UnicodeString::lastIndexOf(const UnicodeString& text,
3879               int32_t start) const {
3880  pinIndex(start);
3881  return lastIndexOf(text, 0, text.length(), start, length() - start);
3882}
3883
3884inline int32_t
3885UnicodeString::lastIndexOf(const UnicodeString& text) const
3886{ return lastIndexOf(text, 0, text.length(), 0, length()); }
3887
3888inline int32_t
3889UnicodeString::lastIndexOf(UChar c,
3890               int32_t start,
3891               int32_t _length) const
3892{ return doLastIndexOf(c, start, _length); }
3893
3894inline int32_t
3895UnicodeString::lastIndexOf(UChar32 c,
3896               int32_t start,
3897               int32_t _length) const {
3898  return doLastIndexOf(c, start, _length);
3899}
3900
3901inline int32_t
3902UnicodeString::lastIndexOf(UChar c) const
3903{ return doLastIndexOf(c, 0, length()); }
3904
3905inline int32_t
3906UnicodeString::lastIndexOf(UChar32 c) const {
3907  return lastIndexOf(c, 0, length());
3908}
3909
3910inline int32_t
3911UnicodeString::lastIndexOf(UChar c,
3912               int32_t start) const {
3913  pinIndex(start);
3914  return doLastIndexOf(c, start, length() - start);
3915}
3916
3917inline int32_t
3918UnicodeString::lastIndexOf(UChar32 c,
3919               int32_t start) const {
3920  pinIndex(start);
3921  return lastIndexOf(c, start, length() - start);
3922}
3923
3924inline UBool
3925UnicodeString::startsWith(const UnicodeString& text) const
3926{ return compare(0, text.length(), text, 0, text.length()) == 0; }
3927
3928inline UBool
3929UnicodeString::startsWith(const UnicodeString& srcText,
3930              int32_t srcStart,
3931              int32_t srcLength) const
3932{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3933
3934inline UBool
3935UnicodeString::startsWith(const UChar *srcChars,
3936              int32_t srcLength) const
3937{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
3938
3939inline UBool
3940UnicodeString::startsWith(const UChar *srcChars,
3941              int32_t srcStart,
3942              int32_t srcLength) const
3943{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
3944
3945inline UBool
3946UnicodeString::endsWith(const UnicodeString& text) const
3947{ return doCompare(length() - text.length(), text.length(),
3948           text, 0, text.length()) == 0; }
3949
3950inline UBool
3951UnicodeString::endsWith(const UnicodeString& srcText,
3952            int32_t srcStart,
3953            int32_t srcLength) const {
3954  srcText.pinIndices(srcStart, srcLength);
3955  return doCompare(length() - srcLength, srcLength,
3956                   srcText, srcStart, srcLength) == 0;
3957}
3958
3959inline UBool
3960UnicodeString::endsWith(const UChar *srcChars,
3961            int32_t srcLength) const {
3962  if(srcLength < 0) {
3963    srcLength = u_strlen(srcChars);
3964  }
3965  return doCompare(length() - srcLength, srcLength,
3966                   srcChars, 0, srcLength) == 0;
3967}
3968
3969inline UBool
3970UnicodeString::endsWith(const UChar *srcChars,
3971            int32_t srcStart,
3972            int32_t srcLength) const {
3973  if(srcLength < 0) {
3974    srcLength = u_strlen(srcChars + srcStart);
3975  }
3976  return doCompare(length() - srcLength, srcLength,
3977                   srcChars, srcStart, srcLength) == 0;
3978}
3979
3980//========================================
3981// replace
3982//========================================
3983inline UnicodeString&
3984UnicodeString::replace(int32_t start,
3985               int32_t _length,
3986               const UnicodeString& srcText)
3987{ return doReplace(start, _length, srcText, 0, srcText.length()); }
3988
3989inline UnicodeString&
3990UnicodeString::replace(int32_t start,
3991               int32_t _length,
3992               const UnicodeString& srcText,
3993               int32_t srcStart,
3994               int32_t srcLength)
3995{ return doReplace(start, _length, srcText, srcStart, srcLength); }
3996
3997inline UnicodeString&
3998UnicodeString::replace(int32_t start,
3999               int32_t _length,
4000               const UChar *srcChars,
4001               int32_t srcLength)
4002{ return doReplace(start, _length, srcChars, 0, srcLength); }
4003
4004inline UnicodeString&
4005UnicodeString::replace(int32_t start,
4006               int32_t _length,
4007               const UChar *srcChars,
4008               int32_t srcStart,
4009               int32_t srcLength)
4010{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4011
4012inline UnicodeString&
4013UnicodeString::replace(int32_t start,
4014               int32_t _length,
4015               UChar srcChar)
4016{ return doReplace(start, _length, &srcChar, 0, 1); }
4017
4018inline UnicodeString&
4019UnicodeString::replace(int32_t start,
4020               int32_t _length,
4021               UChar32 srcChar) {
4022  UChar buffer[U16_MAX_LENGTH];
4023  int32_t count = 0;
4024  UBool isError = FALSE;
4025  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
4026  return doReplace(start, _length, buffer, 0, count);
4027}
4028
4029inline UnicodeString&
4030UnicodeString::replaceBetween(int32_t start,
4031                  int32_t limit,
4032                  const UnicodeString& srcText)
4033{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4034
4035inline UnicodeString&
4036UnicodeString::replaceBetween(int32_t start,
4037                  int32_t limit,
4038                  const UnicodeString& srcText,
4039                  int32_t srcStart,
4040                  int32_t srcLimit)
4041{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4042
4043inline UnicodeString&
4044UnicodeString::findAndReplace(const UnicodeString& oldText,
4045                  const UnicodeString& newText)
4046{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4047            newText, 0, newText.length()); }
4048
4049inline UnicodeString&
4050UnicodeString::findAndReplace(int32_t start,
4051                  int32_t _length,
4052                  const UnicodeString& oldText,
4053                  const UnicodeString& newText)
4054{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4055            newText, 0, newText.length()); }
4056
4057// ============================
4058// extract
4059// ============================
4060inline void
4061UnicodeString::doExtract(int32_t start,
4062             int32_t _length,
4063             UnicodeString& target) const
4064{ target.replace(0, target.length(), *this, start, _length); }
4065
4066inline void
4067UnicodeString::extract(int32_t start,
4068               int32_t _length,
4069               UChar *target,
4070               int32_t targetStart) const
4071{ doExtract(start, _length, target, targetStart); }
4072
4073inline void
4074UnicodeString::extract(int32_t start,
4075               int32_t _length,
4076               UnicodeString& target) const
4077{ doExtract(start, _length, target); }
4078
4079#if !UCONFIG_NO_CONVERSION
4080
4081inline int32_t
4082UnicodeString::extract(int32_t start,
4083               int32_t _length,
4084               char *dst,
4085               const char *codepage) const
4086
4087{
4088  // This dstSize value will be checked explicitly
4089  // Removed #if defined(__GNUC__) per ICU defect http://bugs.icu-project.org/trac/ticket/8197
4090  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4091}
4092#endif
4093
4094inline void
4095UnicodeString::extractBetween(int32_t start,
4096                  int32_t limit,
4097                  UChar *dst,
4098                  int32_t dstStart) const {
4099  pinIndex(start);
4100  pinIndex(limit);
4101  doExtract(start, limit - start, dst, dstStart);
4102}
4103
4104inline UnicodeString
4105UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4106    return tempSubString(start, limit - start);
4107}
4108
4109inline UChar
4110UnicodeString::doCharAt(int32_t offset) const
4111{
4112  if((uint32_t)offset < (uint32_t)length()) {
4113    return getArrayStart()[offset];
4114  } else {
4115    return kInvalidUChar;
4116  }
4117}
4118
4119inline UChar
4120UnicodeString::charAt(int32_t offset) const
4121{ return doCharAt(offset); }
4122
4123inline UChar
4124UnicodeString::operator[] (int32_t offset) const
4125{ return doCharAt(offset); }
4126
4127inline UChar32
4128UnicodeString::char32At(int32_t offset) const
4129{
4130  int32_t len = length();
4131  if((uint32_t)offset < (uint32_t)len) {
4132    const UChar *array = getArrayStart();
4133    UChar32 c;
4134    U16_GET(array, 0, offset, len, c);
4135    return c;
4136  } else {
4137    return kInvalidUChar;
4138  }
4139}
4140
4141inline int32_t
4142UnicodeString::getChar32Start(int32_t offset) const {
4143  if((uint32_t)offset < (uint32_t)length()) {
4144    const UChar *array = getArrayStart();
4145    U16_SET_CP_START(array, 0, offset);
4146    return offset;
4147  } else {
4148    return 0;
4149  }
4150}
4151
4152inline int32_t
4153UnicodeString::getChar32Limit(int32_t offset) const {
4154  int32_t len = length();
4155  if((uint32_t)offset < (uint32_t)len) {
4156    const UChar *array = getArrayStart();
4157    U16_SET_CP_LIMIT(array, 0, offset, len);
4158    return offset;
4159  } else {
4160    return len;
4161  }
4162}
4163
4164inline UBool
4165UnicodeString::isEmpty() const {
4166  return fShortLength == 0;
4167}
4168
4169//========================================
4170// Write implementation methods
4171//========================================
4172inline void
4173UnicodeString::setLength(int32_t len) {
4174  if(len <= 127) {
4175    fShortLength = (int8_t)len;
4176  } else {
4177    fShortLength = (int8_t)-1;
4178    fUnion.fFields.fLength = len;
4179  }
4180}
4181
4182inline void
4183UnicodeString::setToEmpty() {
4184  fShortLength = 0;
4185  fFlags = kShortString;
4186}
4187
4188inline void
4189UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4190  setLength(len);
4191  fUnion.fFields.fArray = array;
4192  fUnion.fFields.fCapacity = capacity;
4193}
4194
4195inline const UChar *
4196UnicodeString::getTerminatedBuffer() {
4197  if(!isWritable()) {
4198    return 0;
4199  } else {
4200    UChar *array = getArrayStart();
4201    int32_t len = length();
4202    if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4203      /*
4204       * kRefCounted: Do not write the NUL if the buffer is shared.
4205       * That is mostly safe, except when the length of one copy was modified
4206       * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4207       * Then the NUL would be written into the middle of another copy's string.
4208       */
4209      if(!(fFlags&kBufferIsReadonly)) {
4210        /*
4211         * We must not write to a readonly buffer, but it is known to be
4212         * NUL-terminated if len<capacity.
4213         * A shared, allocated buffer (refCount()>1) must not have its contents
4214         * modified, but the NUL at [len] is beyond the string contents,
4215         * and multiple string objects and threads writing the same NUL into the
4216         * same location is harmless.
4217         * In all other cases, the buffer is fully writable and it is anyway safe
4218         * to write the NUL.
4219         *
4220         * Note: An earlier version of this code tested whether there is a NUL
4221         * at [len] already, but, while safe, it generated lots of warnings from
4222         * tools like valgrind and Purify.
4223         */
4224        array[len] = 0;
4225      }
4226      return array;
4227    } else if(cloneArrayIfNeeded(len+1)) {
4228      array = getArrayStart();
4229      array[len] = 0;
4230      return array;
4231    } else {
4232      return 0;
4233    }
4234  }
4235}
4236
4237inline UnicodeString&
4238UnicodeString::operator= (UChar ch)
4239{ return doReplace(0, length(), &ch, 0, 1); }
4240
4241inline UnicodeString&
4242UnicodeString::operator= (UChar32 ch)
4243{ return replace(0, length(), ch); }
4244
4245inline UnicodeString&
4246UnicodeString::setTo(const UnicodeString& srcText,
4247             int32_t srcStart,
4248             int32_t srcLength)
4249{
4250  unBogus();
4251  return doReplace(0, length(), srcText, srcStart, srcLength);
4252}
4253
4254inline UnicodeString&
4255UnicodeString::setTo(const UnicodeString& srcText,
4256             int32_t srcStart)
4257{
4258  unBogus();
4259  srcText.pinIndex(srcStart);
4260  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4261}
4262
4263inline UnicodeString&
4264UnicodeString::setTo(const UnicodeString& srcText)
4265{
4266  unBogus();
4267  return doReplace(0, length(), srcText, 0, srcText.length());
4268}
4269
4270inline UnicodeString&
4271UnicodeString::setTo(const UChar *srcChars,
4272             int32_t srcLength)
4273{
4274  unBogus();
4275  return doReplace(0, length(), srcChars, 0, srcLength);
4276}
4277
4278inline UnicodeString&
4279UnicodeString::setTo(UChar srcChar)
4280{
4281  unBogus();
4282  return doReplace(0, length(), &srcChar, 0, 1);
4283}
4284
4285inline UnicodeString&
4286UnicodeString::setTo(UChar32 srcChar)
4287{
4288  unBogus();
4289  return replace(0, length(), srcChar);
4290}
4291
4292inline UnicodeString&
4293UnicodeString::append(const UnicodeString& srcText,
4294              int32_t srcStart,
4295              int32_t srcLength)
4296{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
4297
4298inline UnicodeString&
4299UnicodeString::append(const UnicodeString& srcText)
4300{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4301
4302inline UnicodeString&
4303UnicodeString::append(const UChar *srcChars,
4304              int32_t srcStart,
4305              int32_t srcLength)
4306{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4307
4308inline UnicodeString&
4309UnicodeString::append(const UChar *srcChars,
4310              int32_t srcLength)
4311{ return doReplace(length(), 0, srcChars, 0, srcLength); }
4312
4313inline UnicodeString&
4314UnicodeString::append(UChar srcChar)
4315{ return doReplace(length(), 0, &srcChar, 0, 1); }
4316
4317inline UnicodeString&
4318UnicodeString::append(UChar32 srcChar) {
4319  UChar buffer[U16_MAX_LENGTH];
4320  int32_t _length = 0;
4321  UBool isError = FALSE;
4322  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4323  return doReplace(length(), 0, buffer, 0, _length);
4324}
4325
4326inline UnicodeString&
4327UnicodeString::operator+= (UChar ch)
4328{ return doReplace(length(), 0, &ch, 0, 1); }
4329
4330inline UnicodeString&
4331UnicodeString::operator+= (UChar32 ch) {
4332  return append(ch);
4333}
4334
4335inline UnicodeString&
4336UnicodeString::operator+= (const UnicodeString& srcText)
4337{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4338
4339inline UnicodeString&
4340UnicodeString::insert(int32_t start,
4341              const UnicodeString& srcText,
4342              int32_t srcStart,
4343              int32_t srcLength)
4344{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4345
4346inline UnicodeString&
4347UnicodeString::insert(int32_t start,
4348              const UnicodeString& srcText)
4349{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4350
4351inline UnicodeString&
4352UnicodeString::insert(int32_t start,
4353              const UChar *srcChars,
4354              int32_t srcStart,
4355              int32_t srcLength)
4356{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4357
4358inline UnicodeString&
4359UnicodeString::insert(int32_t start,
4360              const UChar *srcChars,
4361              int32_t srcLength)
4362{ return doReplace(start, 0, srcChars, 0, srcLength); }
4363
4364inline UnicodeString&
4365UnicodeString::insert(int32_t start,
4366              UChar srcChar)
4367{ return doReplace(start, 0, &srcChar, 0, 1); }
4368
4369inline UnicodeString&
4370UnicodeString::insert(int32_t start,
4371              UChar32 srcChar)
4372{ return replace(start, 0, srcChar); }
4373
4374
4375inline UnicodeString&
4376UnicodeString::remove()
4377{
4378  // remove() of a bogus string makes the string empty and non-bogus
4379  // we also un-alias a read-only alias to deal with NUL-termination
4380  // issues with getTerminatedBuffer()
4381  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4382    setToEmpty();
4383  } else {
4384    fShortLength = 0;
4385  }
4386  return *this;
4387}
4388
4389inline UnicodeString&
4390UnicodeString::remove(int32_t start,
4391             int32_t _length)
4392{
4393    if(start <= 0 && _length == INT32_MAX) {
4394        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4395        return remove();
4396    }
4397    return doReplace(start, _length, NULL, 0, 0);
4398}
4399
4400inline UnicodeString&
4401UnicodeString::removeBetween(int32_t start,
4402                int32_t limit)
4403{ return doReplace(start, limit - start, NULL, 0, 0); }
4404
4405inline UnicodeString &
4406UnicodeString::retainBetween(int32_t start, int32_t limit) {
4407  truncate(limit);
4408  return doReplace(0, start, NULL, 0, 0);
4409}
4410
4411inline UBool
4412UnicodeString::truncate(int32_t targetLength)
4413{
4414  if(isBogus() && targetLength == 0) {
4415    // truncate(0) of a bogus string makes the string empty and non-bogus
4416    unBogus();
4417    return FALSE;
4418  } else if((uint32_t)targetLength < (uint32_t)length()) {
4419    setLength(targetLength);
4420    if(fFlags&kBufferIsReadonly) {
4421      fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
4422    }
4423    return TRUE;
4424  } else {
4425    return FALSE;
4426  }
4427}
4428
4429inline UnicodeString&
4430UnicodeString::reverse()
4431{ return doReverse(0, length()); }
4432
4433inline UnicodeString&
4434UnicodeString::reverse(int32_t start,
4435               int32_t _length)
4436{ return doReverse(start, _length); }
4437
4438U_NAMESPACE_END
4439
4440#endif
4441