1/********************************************************************
2 * Copyright (c) 1999-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *   Date        Name        Description
6 *   12/14/99    Madhu        Creation.
7 *   01/12/2000  Madhu        updated for changed API
8 ********************************************************************/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/uchar.h"
15#include "intltest.h"
16#include "unicode/rbbi.h"
17#include "unicode/schriter.h"
18#include "rbbiapts.h"
19#include "rbbidata.h"
20#include "cstring.h"
21#include "ubrkimpl.h"
22#include "unicode/locid.h"
23#include "unicode/ustring.h"
24#include "unicode/utext.h"
25#include "cmemory.h"
26#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
27#include "unicode/filteredbrk.h"
28#include <stdio.h> // for sprintf
29#endif
30/**
31 * API Test the RuleBasedBreakIterator class
32 */
33
34
35#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
36dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
37
38#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
39    errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
40
41void RBBIAPITest::TestCloneEquals()
42{
43
44    UErrorCode status=U_ZERO_ERROR;
45    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
46    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
47    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
49    if(U_FAILURE(status)){
50        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
51        return;
52    }
53
54
55    UnicodeString testString="Testing word break iterators's clone() and equals()";
56    bi1->setText(testString);
57    bi2->setText(testString);
58    biequal->setText(testString);
59
60    bi3->setText("hello");
61
62    logln((UnicodeString)"Testing equals()");
63
64    logln((UnicodeString)"Testing == and !=");
65    UBool b = (*bi1 != *biequal);
66    b |= *bi1 == *bi2;
67    b |= *bi1 == *bi3;
68    if (b) {
69        errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
70    }
71
72    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
73        errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
74
75
76    // Quick test of RulesBasedBreakIterator assignment -
77    // Check that
78    //    two different iterators are !=
79    //    they are == after assignment
80    //    source and dest iterator produce the same next() after assignment.
81    //    deleting one doesn't disable the other.
82    logln("Testing assignment");
83    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
84    if(U_FAILURE(status)){
85        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
86        return;
87    }
88
89    RuleBasedBreakIterator biDefault, biDefault2;
90    if(U_FAILURE(status)){
91        errln((UnicodeString)"FAIL : in construction of default iterator");
92        return;
93    }
94    if (biDefault == *bix) {
95        errln((UnicodeString)"ERROR: iterators should not compare ==");
96        return;
97    }
98    if (biDefault != biDefault2) {
99        errln((UnicodeString)"ERROR: iterators should compare ==");
100        return;
101    }
102
103
104    UnicodeString   HelloString("Hello Kitty");
105    bix->setText(HelloString);
106    if (*bix == *bi2) {
107        errln(UnicodeString("ERROR: strings should not be equal before assignment."));
108    }
109    *bix = *bi2;
110    if (*bix != *bi2) {
111        errln(UnicodeString("ERROR: strings should be equal before assignment."));
112    }
113
114    int bixnext = bix->next();
115    int bi2next = bi2->next();
116    if (! (bixnext == bi2next && bixnext == 7)) {
117        errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
118    }
119    delete bix;
120    if (bi2->next() != 8) {
121        errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
122    }
123
124
125
126    logln((UnicodeString)"Testing clone()");
127    RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
128    RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
129
130    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
131      *bi1clone == *bi3 || *bi1clone == *bi2)
132        errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
133
134    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
135       *bi2clone == *bi3 || *bi2clone != *bi2)
136        errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
137
138    if(bi1->getText() != bi1clone->getText()   ||
139       bi2clone->getText() != bi2->getText()   ||
140       *bi2clone == *bi1clone )
141        errln((UnicodeString)"ERROR: RBBI's clone() method failed");
142
143    delete bi1clone;
144    delete bi2clone;
145    delete bi1;
146    delete bi3;
147    delete bi2;
148    delete biequal;
149}
150
151void RBBIAPITest::TestBoilerPlate()
152{
153    UErrorCode status = U_ZERO_ERROR;
154    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
155    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
156    if (U_FAILURE(status)) {
157        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
158        return;
159    }
160    if(*a!=*b){
161        errln("Failed: boilerplate method operator!= does not return correct results");
162    }
163    // Japanese word break iterators are identical to root with
164    // a dictionary-based break iterator
165    BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
166    BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
167    if(c && d){
168        if(*c!=*d){
169            errln("Failed: boilerplate method operator== does not return correct results");
170        }
171    }else{
172        errln("creation of break iterator failed");
173    }
174    delete a;
175    delete b;
176    delete c;
177    delete d;
178}
179
180void RBBIAPITest::TestgetRules()
181{
182    UErrorCode status=U_ZERO_ERROR;
183
184    RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
185    RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
186    if(U_FAILURE(status)){
187        errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
188        delete bi1;
189        delete bi2;
190        return;
191    }
192
193
194
195    logln((UnicodeString)"Testing toString()");
196
197    bi1->setText((UnicodeString)"Hello there");
198
199    RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
200
201    UnicodeString temp=bi1->getRules();
202    UnicodeString temp2=bi2->getRules();
203    UnicodeString temp3=bi3->getRules();
204    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
205        errln((UnicodeString)"ERROR: error in getRules() method");
206
207    delete bi1;
208    delete bi2;
209    delete bi3;
210}
211void RBBIAPITest::TestHashCode()
212{
213    UErrorCode status=U_ZERO_ERROR;
214    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
215    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
216    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
217    if(U_FAILURE(status)){
218        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
219        delete bi1;
220        delete bi2;
221        delete bi3;
222        return;
223    }
224
225
226    logln((UnicodeString)"Testing hashCode()");
227
228    bi1->setText((UnicodeString)"Hash code");
229    bi2->setText((UnicodeString)"Hash code");
230    bi3->setText((UnicodeString)"Hash code");
231
232    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
233    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
234
235    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
236        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
237        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
238
239    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
240        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
241        errln((UnicodeString)"ERROR: different objects have same hashcodes");
242
243    delete bi1clone;
244    delete bi2clone;
245    delete bi1;
246    delete bi2;
247    delete bi3;
248
249}
250void RBBIAPITest::TestGetSetAdoptText()
251{
252    logln((UnicodeString)"Testing getText setText ");
253    IcuTestErrorCode status(*this, "TestGetSetAdoptText");
254    UnicodeString str1="first string.";
255    UnicodeString str2="Second string.";
256    LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
257    LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
258    if(status.isFailure()){
259        errcheckln(status, "Fail : in construction - %s", status.errorName());
260            return;
261    }
262
263
264    CharacterIterator* text1= new StringCharacterIterator(str1);
265    CharacterIterator* text1Clone = text1->clone();
266    CharacterIterator* text2= new StringCharacterIterator(str2);
267    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
268
269    wordIter1->setText(str1);
270    CharacterIterator *tci = &wordIter1->getText();
271    UnicodeString      tstr;
272    tci->getText(tstr);
273    TEST_ASSERT(tstr == str1);
274    if(wordIter1->current() != 0)
275        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
276
277    wordIter1->next(2);
278
279    wordIter1->setText(str2);
280    if(wordIter1->current() != 0)
281        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
282
283
284    charIter1->adoptText(text1Clone);
285    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
286    tci = &wordIter1->getText();
287    tci->getText(tstr);
288    TEST_ASSERT(tstr == str2);
289    tci = &charIter1->getText();
290    tci->getText(tstr);
291    TEST_ASSERT(tstr == str1);
292
293
294    LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
295    rb->adoptText(text1);
296    if(rb->getText() != *text1)
297        errln((UnicodeString)"ERROR:1 error in adoptText ");
298    rb->adoptText(text2);
299    if(rb->getText() != *text2)
300        errln((UnicodeString)"ERROR:2 error in adoptText ");
301
302    // Adopt where iterator range is less than the entire orignal source string.
303    //   (With the change of the break engine to working with UText internally,
304    //    CharacterIterators starting at positions other than zero are not supported)
305    rb->adoptText(text3);
306    TEST_ASSERT(rb->preceding(2) == 0);
307    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
308    //if(rb->preceding(2) != 3) {
309    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
310    //}
311    //if(rb->following(11) != BreakIterator::DONE) {
312    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
313    //}
314
315    // UText API
316    //
317    //   Quick test to see if UText is working at all.
318    //
319    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
320    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
321    //                012345678901
322
323    status.reset();
324    LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
325    wordIter1->setText(ut.getAlias(), status);
326    TEST_ASSERT_SUCCESS(status);
327
328    int32_t pos;
329    pos = wordIter1->first();
330    TEST_ASSERT(pos==0);
331    pos = wordIter1->next();
332    TEST_ASSERT(pos==5);
333    pos = wordIter1->next();
334    TEST_ASSERT(pos==6);
335    pos = wordIter1->next();
336    TEST_ASSERT(pos==11);
337    pos = wordIter1->next();
338    TEST_ASSERT(pos==UBRK_DONE);
339
340    status.reset();
341    LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
342    TEST_ASSERT_SUCCESS(status);
343    wordIter1->setText(ut2.getAlias(), status);
344    TEST_ASSERT_SUCCESS(status);
345
346    pos = wordIter1->first();
347    TEST_ASSERT(pos==0);
348    pos = wordIter1->next();
349    TEST_ASSERT(pos==3);
350    pos = wordIter1->next();
351    TEST_ASSERT(pos==4);
352
353    pos = wordIter1->last();
354    TEST_ASSERT(pos==6);
355    pos = wordIter1->previous();
356    TEST_ASSERT(pos==4);
357    pos = wordIter1->previous();
358    TEST_ASSERT(pos==3);
359    pos = wordIter1->previous();
360    TEST_ASSERT(pos==0);
361    pos = wordIter1->previous();
362    TEST_ASSERT(pos==UBRK_DONE);
363
364    status.reset();
365    UnicodeString sEmpty;
366    LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
367    wordIter1->getUText(gut2.getAlias(), status);
368    TEST_ASSERT_SUCCESS(status);
369    status.reset();
370}
371
372
373void RBBIAPITest::TestIteration()
374{
375    // This test just verifies that the API is present.
376    // Testing for correct operation of the break rules happens elsewhere.
377
378    UErrorCode status=U_ZERO_ERROR;
379    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
380    if (U_FAILURE(status) || bi == NULL)  {
381        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
382    }
383    delete bi;
384
385    status=U_ZERO_ERROR;
386    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
387    if (U_FAILURE(status) || bi == NULL)  {
388        errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
389    }
390    delete bi;
391
392    status=U_ZERO_ERROR;
393    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
394    if (U_FAILURE(status) || bi == NULL)  {
395        errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
396    }
397    delete bi;
398
399    status=U_ZERO_ERROR;
400    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
401    if (U_FAILURE(status) || bi == NULL)  {
402        errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
403    }
404    delete bi;
405
406    status=U_ZERO_ERROR;
407    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
408    if (U_FAILURE(status) || bi == NULL)  {
409        errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
410    }
411    delete bi;
412
413    status=U_ZERO_ERROR;
414    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
415    if (U_FAILURE(status) || bi == NULL)  {
416        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
417        return;   // Skip the rest of these tests.
418    }
419
420
421    UnicodeString testString="0123456789";
422    bi->setText(testString);
423
424    int32_t i;
425    i = bi->first();
426    if (i != 0) {
427        errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
428    }
429
430    i = bi->last();
431    if (i != 10) {
432        errln("Incorrect value from bi->last().  Expected 10, got %d", i);
433    }
434
435    //
436    // Previous
437    //
438    bi->last();
439    i = bi->previous();
440    if (i != 9) {
441        errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
442    }
443
444
445    bi->first();
446    i = bi->previous();
447    if (i != BreakIterator::DONE) {
448        errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
449    }
450
451    //
452    // next()
453    //
454    bi->first();
455    i = bi->next();
456    if (i != 1) {
457        errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
458    }
459
460    bi->last();
461    i = bi->next();
462    if (i != BreakIterator::DONE) {
463        errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
464    }
465
466
467    //
468    //  current()
469    //
470    bi->first();
471    i = bi->current();
472    if (i != 0) {
473        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
474    }
475
476    bi->next();
477    i = bi->current();
478    if (i != 1) {
479        errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
480    }
481
482    bi->last();
483    bi->next();
484    i = bi->current();
485    if (i != 10) {
486        errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
487    }
488
489    bi->first();
490    bi->previous();
491    i = bi->current();
492    if (i != 0) {
493        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
494    }
495
496
497    //
498    // Following()
499    //
500    i = bi->following(4);
501    if (i != 5) {
502        errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
503    }
504
505    i = bi->following(9);
506    if (i != 10) {
507        errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
508    }
509
510    i = bi->following(10);
511    if (i != BreakIterator::DONE) {
512        errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
513    }
514
515
516    //
517    // Preceding
518    //
519    i = bi->preceding(4);
520    if (i != 3) {
521        errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
522    }
523
524    i = bi->preceding(10);
525    if (i != 9) {
526        errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
527    }
528
529    i = bi->preceding(1);
530    if (i != 0) {
531        errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
532    }
533
534    i = bi->preceding(0);
535    if (i != BreakIterator::DONE) {
536        errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
537    }
538
539
540    //
541    // isBoundary()
542    //
543    bi->first();
544    if (bi->isBoundary(3) != TRUE) {
545        errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
546    }
547    i = bi->current();
548    if (i != 3) {
549        errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
550    }
551
552
553    if (bi->isBoundary(11) != FALSE) {
554        errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
555    }
556    i = bi->current();
557    if (i != 10) {
558        errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
559    }
560
561    //
562    // next(n)
563    //
564    bi->first();
565    i = bi->next(4);
566    if (i != 4) {
567        errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
568    }
569
570    i = bi->next(6);
571    if (i != 10) {
572        errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
573    }
574
575    bi->first();
576    i = bi->next(11);
577    if (i != BreakIterator::DONE) {
578        errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
579    }
580
581    delete bi;
582
583}
584
585
586
587
588
589
590void RBBIAPITest::TestBuilder() {
591     UnicodeString rulesString1 = "$Letters = [:L:];\n"
592                                  "$Numbers = [:N:];\n"
593                                  "$Letters+;\n"
594                                  "$Numbers+;\n"
595                                  "[^$Letters $Numbers];\n"
596                                  "!.*;\n";
597     UnicodeString testString1  = "abc123..abc";
598                                // 01234567890
599     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
600     UErrorCode status=U_ZERO_ERROR;
601     UParseError    parseError;
602
603     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
604     if(U_FAILURE(status)) {
605         dataerrln("Fail : in construction - %s", u_errorName(status));
606     } else {
607         bi->setText(testString1);
608         doBoundaryTest(*bi, testString1, bounds1);
609     }
610     delete bi;
611}
612
613
614//
615//  TestQuoteGrouping
616//       Single quotes within rules imply a grouping, so that a modifier
617//       following the quoted text (* or +) applies to all of the quoted chars.
618//
619void RBBIAPITest::TestQuoteGrouping() {
620     UnicodeString rulesString1 = "#Here comes the rule...\n"
621                                  "'$@!'*;\n"   //  (\$\@\!)*
622                                  ".;\n";
623
624     UnicodeString testString1  = "$@!$@!X$@!!X";
625                                // 0123456789012
626     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
627     UErrorCode status=U_ZERO_ERROR;
628     UParseError    parseError;
629
630     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
631     if(U_FAILURE(status)) {
632         dataerrln("Fail : in construction - %s", u_errorName(status));
633     } else {
634         bi->setText(testString1);
635         doBoundaryTest(*bi, testString1, bounds1);
636     }
637     delete bi;
638}
639
640//
641//  TestRuleStatus
642//      Test word break rule status constants.
643//
644void RBBIAPITest::TestRuleStatus() {
645     UChar str[30];
646     //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
647     // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
648     u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
649              // 012345678901234567  8      9    0
650              //                     Katakana
651                str, 30);
652     UnicodeString testString1(str);
653     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
654     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
655                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
656                          UBRK_WORD_IDEO,     UBRK_WORD_NONE};
657
658     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
659                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
660                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
661
662     UErrorCode status=U_ZERO_ERROR;
663
664     BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
665     if(U_FAILURE(status)) {
666         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
667     } else {
668         bi->setText(testString1);
669         // First test that the breaks are in the right spots.
670         doBoundaryTest(*bi, testString1, bounds1);
671
672         // Then go back and check tag values
673         int32_t i = 0;
674         int32_t pos, tag;
675         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
676             if (pos != bounds1[i]) {
677                 errln("FAIL: unexpected word break at postion %d", pos);
678                 break;
679             }
680             tag = bi->getRuleStatus();
681             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
682                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
683                 break;
684             }
685
686             // Check that we get the same tag values from getRuleStatusVec()
687             int32_t vec[10];
688             int t = bi->getRuleStatusVec(vec, 10, status);
689             TEST_ASSERT_SUCCESS(status);
690             TEST_ASSERT(t==1);
691             TEST_ASSERT(vec[0] == tag);
692         }
693     }
694     delete bi;
695
696     // Now test line break status.  This test mostly is to confirm that the status constants
697     //                              are correctly declared in the header.
698     testString1 =   "test line. \n";
699     // break type    s    s     h
700
701     bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
702     if(U_FAILURE(status)) {
703         errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
704     } else {
705         int32_t i = 0;
706         int32_t pos, tag;
707         UBool   success;
708
709         bi->setText(testString1);
710         pos = bi->current();
711         tag = bi->getRuleStatus();
712         for (i=0; i<3; i++) {
713             switch (i) {
714             case 0:
715                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
716             case 1:
717                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
718             case 2:
719                 success = pos==12 && tag==UBRK_LINE_HARD; break;
720             default:
721                 success = FALSE; break;
722             }
723             if (success == FALSE) {
724                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
725                     i, pos, tag);
726                 break;
727             }
728             pos = bi->next();
729             tag = bi->getRuleStatus();
730         }
731         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
732             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
733             (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
734             errln("UBRK_LINE_* constants from header are inconsistent.");
735         }
736     }
737     delete bi;
738
739}
740
741
742//
743//  TestRuleStatusVec
744//      Test the vector form of  break rule status.
745//
746void RBBIAPITest::TestRuleStatusVec() {
747    UnicodeString rulesString(   "[A-N]{100}; \n"
748                                 "[a-w]{200}; \n"
749                                 "[\\p{L}]{300}; \n"
750                                 "[\\p{N}]{400}; \n"
751                                 "[0-5]{500}; \n"
752                                  "!.*;\n", -1, US_INV);
753     UnicodeString testString1  = "Aapz5?";
754     int32_t  statusVals[10];
755     int32_t  numStatuses;
756     int32_t  pos;
757
758     UErrorCode status=U_ZERO_ERROR;
759     UParseError    parseError;
760
761     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
762     if (U_FAILURE(status)) {
763         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
764     } else {
765         bi->setText(testString1);
766
767         // A
768         pos = bi->next();
769         TEST_ASSERT(pos==1);
770         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
771         TEST_ASSERT_SUCCESS(status);
772         TEST_ASSERT(numStatuses == 2);
773         TEST_ASSERT(statusVals[0] == 100);
774         TEST_ASSERT(statusVals[1] == 300);
775
776         // a
777         pos = bi->next();
778         TEST_ASSERT(pos==2);
779         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780         TEST_ASSERT_SUCCESS(status);
781         TEST_ASSERT(numStatuses == 2);
782         TEST_ASSERT(statusVals[0] == 200);
783         TEST_ASSERT(statusVals[1] == 300);
784
785         // p
786         pos = bi->next();
787         TEST_ASSERT(pos==3);
788         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789         TEST_ASSERT_SUCCESS(status);
790         TEST_ASSERT(numStatuses == 2);
791         TEST_ASSERT(statusVals[0] == 200);
792         TEST_ASSERT(statusVals[1] == 300);
793
794         // z
795         pos = bi->next();
796         TEST_ASSERT(pos==4);
797         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798         TEST_ASSERT_SUCCESS(status);
799         TEST_ASSERT(numStatuses == 1);
800         TEST_ASSERT(statusVals[0] == 300);
801
802         // 5
803         pos = bi->next();
804         TEST_ASSERT(pos==5);
805         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
806         TEST_ASSERT_SUCCESS(status);
807         TEST_ASSERT(numStatuses == 2);
808         TEST_ASSERT(statusVals[0] == 400);
809         TEST_ASSERT(statusVals[1] == 500);
810
811         // ?
812         pos = bi->next();
813         TEST_ASSERT(pos==6);
814         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815         TEST_ASSERT_SUCCESS(status);
816         TEST_ASSERT(numStatuses == 1);
817         TEST_ASSERT(statusVals[0] == 0);
818
819         //
820         //  Check buffer overflow error handling.   Char == A
821         //
822         bi->first();
823         pos = bi->next();
824         TEST_ASSERT(pos==1);
825         memset(statusVals, -1, sizeof(statusVals));
826         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
827         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
828         TEST_ASSERT(numStatuses == 2);
829         TEST_ASSERT(statusVals[0] == -1);
830
831         status = U_ZERO_ERROR;
832         memset(statusVals, -1, sizeof(statusVals));
833         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
834         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
835         TEST_ASSERT(numStatuses == 2);
836         TEST_ASSERT(statusVals[0] == 100);
837         TEST_ASSERT(statusVals[1] == -1);
838
839         status = U_ZERO_ERROR;
840         memset(statusVals, -1, sizeof(statusVals));
841         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
842         TEST_ASSERT_SUCCESS(status);
843         TEST_ASSERT(numStatuses == 2);
844         TEST_ASSERT(statusVals[0] == 100);
845         TEST_ASSERT(statusVals[1] == 300);
846         TEST_ASSERT(statusVals[2] == -1);
847     }
848     delete bi;
849
850}
851
852//
853//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
854//                               $variable reference
855void RBBIAPITest::TestBug2190() {
856     UnicodeString rulesString1 = "$aaa = abcd;\n"
857                                  "$bbb = $aaa;\n"
858                                  "$bbb;\n";
859     UnicodeString testString1  = "abcdabcd";
860                                // 01234567890
861     int32_t bounds1[] = {0, 4, 8};
862     UErrorCode status=U_ZERO_ERROR;
863     UParseError    parseError;
864
865     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
866     if(U_FAILURE(status)) {
867         dataerrln("Fail : in construction - %s", u_errorName(status));
868     } else {
869         bi->setText(testString1);
870         doBoundaryTest(*bi, testString1, bounds1);
871     }
872     delete bi;
873}
874
875
876void RBBIAPITest::TestRegistration() {
877#if !UCONFIG_NO_SERVICE
878    UErrorCode status = U_ZERO_ERROR;
879    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
880    // ok to not delete these if we exit because of error?
881    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
882    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
883    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
884
885    if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
886        dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
887
888        delete ja_word;
889        delete ja_char;
890        delete root_word;
891        delete root_char;
892
893        return;
894    }
895
896    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
897    {
898#if 0 // With a dictionary based word breaking, ja_word is identical to root.
899        if (ja_word && *ja_word == *root_word) {
900            errln("japan not different from root");
901        }
902#endif
903    }
904
905    {
906        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
907        UBool fail = TRUE;
908        if(result){
909            fail = *result != *ja_word;
910        }
911        delete result;
912        if (fail) {
913            errln("bad result for xx_XX/word");
914        }
915    }
916
917    {
918        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
919        UBool fail = TRUE;
920        if(result){
921            fail = *result != *ja_char;
922        }
923        delete result;
924        if (fail) {
925            errln("bad result for ja_JP/char");
926        }
927    }
928
929    {
930        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
931        UBool fail = TRUE;
932        if(result){
933            fail = *result != *root_char;
934        }
935        delete result;
936        if (fail) {
937            errln("bad result for xx_XX/char");
938        }
939    }
940
941    {
942        StringEnumeration* avail = BreakIterator::getAvailableLocales();
943        UBool found = FALSE;
944        const UnicodeString* p;
945        while ((p = avail->snext(status))) {
946            if (p->compare("xx") == 0) {
947                found = TRUE;
948                break;
949            }
950        }
951        delete avail;
952        if (!found) {
953            errln("did not find test locale");
954        }
955    }
956
957    {
958        UBool unreg = BreakIterator::unregister(key, status);
959        if (!unreg) {
960            errln("unable to unregister");
961        }
962    }
963
964    {
965        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
966        BreakIterator* root = BreakIterator::createWordInstance("", status);
967        UBool fail = TRUE;
968        if(root){
969          fail = *root != *result;
970        }
971        delete root;
972        delete result;
973        if (fail) {
974            errln("did not get root break");
975        }
976    }
977
978    {
979        StringEnumeration* avail = BreakIterator::getAvailableLocales();
980        UBool found = FALSE;
981        const UnicodeString* p;
982        while ((p = avail->snext(status))) {
983            if (p->compare("xx") == 0) {
984                found = TRUE;
985                break;
986            }
987        }
988        delete avail;
989        if (found) {
990            errln("found test locale");
991        }
992    }
993
994    {
995        int32_t count;
996        UBool   foundLocale = FALSE;
997        const Locale *avail = BreakIterator::getAvailableLocales(count);
998        for (int i=0; i<count; i++) {
999            if (avail[i] == Locale::getEnglish()) {
1000                foundLocale = TRUE;
1001                break;
1002            }
1003        }
1004        if (foundLocale == FALSE) {
1005            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1006        }
1007    }
1008
1009
1010    // ja_word was adopted by factory
1011    delete ja_char;
1012    delete root_word;
1013    delete root_char;
1014#endif
1015}
1016
1017void RBBIAPITest::RoundtripRule(const char *dataFile) {
1018    UErrorCode status = U_ZERO_ERROR;
1019    UParseError parseError;
1020    parseError.line = 0;
1021    parseError.offset = 0;
1022    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1023    uint32_t length;
1024    const UChar *builtSource;
1025    const uint8_t *rbbiRules;
1026    const uint8_t *builtRules;
1027
1028    if (U_FAILURE(status)) {
1029        errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1030        return;
1031    }
1032
1033    builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1034    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1035    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1036    if (U_FAILURE(status)) {
1037        errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1038                u_errorName(status), parseError.line, parseError.offset);
1039        return;
1040    };
1041    rbbiRules = brkItr->getBinaryRules(length);
1042    logln("Comparing \"%s\" len=%d", dataFile, length);
1043    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1044        errln("Built rules and rebuilt rules are different %s", dataFile);
1045        return;
1046    }
1047    delete brkItr;
1048}
1049
1050void RBBIAPITest::TestRoundtripRules() {
1051    RoundtripRule("word");
1052    RoundtripRule("title");
1053    RoundtripRule("sent");
1054    RoundtripRule("line");
1055    RoundtripRule("char");
1056    if (!quick) {
1057        RoundtripRule("word_POSIX");
1058    }
1059}
1060
1061// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1062// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1063// This is just a sanity check, not a thorough test (e.g. we don't check that the
1064// first delete actually frees rulesCopy).
1065void RBBIAPITest::TestCreateFromRBBIData() {
1066    // Get some handy RBBIData
1067    const char *brkName = "word"; // or "sent", "line", "char", etc.
1068    UErrorCode status = U_ZERO_ERROR;
1069    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
1070    if ( U_SUCCESS(status) ) {
1071        const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
1072        uint32_t length = builtRules->fLength;
1073        RBBIWithProtectedFunctions * brkItr;
1074
1075        // Try the memory-adopting constructor, need to copy the data first
1076        RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1077        if ( rulesCopy ) {
1078            uprv_memcpy( rulesCopy, builtRules, length );
1079
1080            brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1081            if ( U_SUCCESS(status) ) {
1082                delete brkItr; // this should free rulesCopy
1083            } else {
1084                errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1085                status = U_ZERO_ERROR;// reset for the next test
1086                uprv_free( rulesCopy );
1087            }
1088        }
1089
1090        // Now try the non-adopting constructor
1091        brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1092        if ( U_SUCCESS(status) ) {
1093            delete brkItr; // this should NOT attempt to free builtRules
1094            if (builtRules->fLength != length) { // sanity check
1095                errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1096            }
1097        } else {
1098            errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1099        }
1100    }
1101
1102    // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1103    //
1104    status = U_ZERO_ERROR;
1105    RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
1106    if (rb == NULL || U_FAILURE(status)) {
1107        dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
1108    } else {
1109        uint32_t length;
1110        const uint8_t *rules = rb->getBinaryRules(length);
1111        RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
1112        TEST_ASSERT_SUCCESS(status);
1113        TEST_ASSERT(*rb == *rb2);
1114        UnicodeString words = "one two three ";
1115        rb2->setText(words);
1116        int wordCounter = 0;
1117        while (rb2->next() != UBRK_DONE) {
1118            wordCounter++;
1119        }
1120        TEST_ASSERT(wordCounter == 6);
1121
1122        status = U_ZERO_ERROR;
1123        RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
1124        TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1125
1126        delete rb;
1127        delete rb2;
1128        delete rb3;
1129    }
1130}
1131
1132
1133void RBBIAPITest::TestRefreshInputText() {
1134    /*
1135     *  RefreshInput changes out the input of a Break Iterator without
1136     *    changing anything else in the iterator's state.  Used with Java JNI,
1137     *    when Java moves the underlying string storage.   This test
1138     *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1139     *    The right set of boundaries should still be found.
1140     */
1141    UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1142    UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1143    UErrorCode status = U_ZERO_ERROR;
1144    UText ut1 = UTEXT_INITIALIZER;
1145    UText ut2 = UTEXT_INITIALIZER;
1146    RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1147    TEST_ASSERT_SUCCESS(status);
1148
1149    utext_openUChars(&ut1, testStr, -1, &status);
1150    TEST_ASSERT_SUCCESS(status);
1151
1152    if (U_SUCCESS(status)) {
1153        bi->setText(&ut1, status);
1154        TEST_ASSERT_SUCCESS(status);
1155
1156        /* Line boundaries will occur before each letter in the original string */
1157        TEST_ASSERT(1 == bi->next());
1158        TEST_ASSERT(3 == bi->next());
1159
1160        /* Move the string, kill the original string.  */
1161        u_strcpy(movedStr, testStr);
1162        u_memset(testStr, 0x20, u_strlen(testStr));
1163        utext_openUChars(&ut2, movedStr, -1, &status);
1164        TEST_ASSERT_SUCCESS(status);
1165        RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1166        TEST_ASSERT_SUCCESS(status);
1167        TEST_ASSERT(bi == returnedBI);
1168
1169        /* Find the following matches, now working in the moved string. */
1170        TEST_ASSERT(5 == bi->next());
1171        TEST_ASSERT(7 == bi->next());
1172        TEST_ASSERT(8 == bi->next());
1173        TEST_ASSERT(UBRK_DONE == bi->next());
1174
1175        utext_close(&ut1);
1176        utext_close(&ut2);
1177    }
1178    delete bi;
1179
1180}
1181
1182#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1183static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1184  static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1185  it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1186
1187  int32_t *pos = new int32_t[ustr.length()];
1188  int32_t posCount = 0;
1189
1190  // calculate breaks up front, so we can print out
1191  // sans any debugging
1192  for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1193    pos[posCount++] = n;
1194    if(posCount>=ustr.length()) {
1195      it.errln("brk count exceeds string length!");
1196      return;
1197    }
1198  }
1199  UnicodeString out;
1200  out.append((UChar)CHSTR);
1201  int32_t prev = 0;
1202  for(int32_t i=0;i<posCount;i++) {
1203    int32_t n=pos[i];
1204    out.append(ustr.tempSubString(prev,n-prev));
1205    out.append((UChar)PILCROW);
1206    prev=n;
1207  }
1208  out.append(ustr.tempSubString(prev,ustr.length()-prev));
1209  out.append((UChar)CHEND);
1210  it.logln(out);
1211
1212  out.remove();
1213  for(int32_t i=0;i<posCount;i++) {
1214    char tmp[100];
1215    sprintf(tmp,"%d ",pos[i]);
1216    out.append(UnicodeString(tmp));
1217  }
1218  it.logln(out);
1219  delete [] pos;
1220}
1221#endif
1222
1223void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1224#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1225  UErrorCode status = U_ZERO_ERROR;
1226  LocalPointer<FilteredBreakIteratorBuilder> builder;
1227  LocalPointer<BreakIterator> baseBI;
1228  LocalPointer<BreakIterator> filteredBI;
1229
1230  const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1231  const UnicodeString ABBR_MR("Mr.");
1232  const UnicodeString ABBR_CAPT("Capt.");
1233
1234  {
1235    logln("Constructing empty builder\n");
1236    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1237    TEST_ASSERT_SUCCESS(status);
1238
1239    logln("Constructing base BI\n");
1240    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1241    TEST_ASSERT_SUCCESS(status);
1242
1243    logln("Building new BI\n");
1244    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1245    TEST_ASSERT_SUCCESS(status);
1246
1247    logln("Testing:");
1248    filteredBI->setText(text);
1249    TEST_ASSERT(20 == filteredBI->next()); // Mr.
1250    TEST_ASSERT(84 == filteredBI->next()); // recovered.
1251    TEST_ASSERT(90 == filteredBI->next()); // Capt.
1252    TEST_ASSERT(181 == filteredBI->next()); // Mr.
1253    TEST_ASSERT(278 == filteredBI->next()); // charge.
1254    filteredBI->first();
1255    prtbrks(filteredBI.getAlias(), text, *this);
1256  }
1257
1258  {
1259    logln("Constructing empty builder\n");
1260    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1261    TEST_ASSERT_SUCCESS(status);
1262
1263    logln("Adding Mr. as an exception\n");
1264    TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1265    TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1266    TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1267    TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1268    TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1269    TEST_ASSERT_SUCCESS(status);
1270
1271    logln("Constructing base BI\n");
1272    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1273    TEST_ASSERT_SUCCESS(status);
1274
1275    logln("Building new BI\n");
1276    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1277    TEST_ASSERT_SUCCESS(status);
1278
1279    logln("Testing:");
1280    filteredBI->setText(text);
1281    TEST_ASSERT(84 == filteredBI->next());
1282    TEST_ASSERT(90 == filteredBI->next());// Capt.
1283    TEST_ASSERT(278 == filteredBI->next());
1284    filteredBI->first();
1285    prtbrks(filteredBI.getAlias(), text, *this);
1286  }
1287
1288
1289  {
1290    logln("Constructing empty builder\n");
1291    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1292    TEST_ASSERT_SUCCESS(status);
1293
1294    logln("Adding Mr. and Capt as an exception\n");
1295    TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1296    TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1297    TEST_ASSERT_SUCCESS(status);
1298
1299    logln("Constructing base BI\n");
1300    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1301    TEST_ASSERT_SUCCESS(status);
1302
1303    logln("Building new BI\n");
1304    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1305    TEST_ASSERT_SUCCESS(status);
1306
1307    logln("Testing:");
1308    filteredBI->setText(text);
1309    TEST_ASSERT(84 == filteredBI->next());
1310    TEST_ASSERT(278 == filteredBI->next());
1311    filteredBI->first();
1312    prtbrks(filteredBI.getAlias(), text, *this);
1313  }
1314
1315
1316  {
1317    logln("Constructing English builder\n");
1318    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1319    TEST_ASSERT_SUCCESS(status);
1320
1321    logln("Constructing base BI\n");
1322    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1323    TEST_ASSERT_SUCCESS(status);
1324
1325    logln("unsuppressing 'Capt'");
1326    TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1327
1328    logln("Building new BI\n");
1329    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1330    TEST_ASSERT_SUCCESS(status);
1331
1332    if(filteredBI.isValid()) {
1333      logln("Testing:");
1334      filteredBI->setText(text);
1335      TEST_ASSERT(84 == filteredBI->next());
1336      TEST_ASSERT(90 == filteredBI->next());
1337      TEST_ASSERT(278 == filteredBI->next());
1338      filteredBI->first();
1339      prtbrks(filteredBI.getAlias(), text, *this);
1340    }
1341  }
1342
1343
1344  {
1345    logln("Constructing English builder\n");
1346    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1347    TEST_ASSERT_SUCCESS(status);
1348
1349    logln("Constructing base BI\n");
1350    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1351    TEST_ASSERT_SUCCESS(status);
1352
1353    logln("Building new BI\n");
1354    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1355    TEST_ASSERT_SUCCESS(status);
1356
1357    if(filteredBI.isValid()) {
1358      logln("Testing:");
1359      filteredBI->setText(text);
1360      TEST_ASSERT(84 == filteredBI->next());
1361      TEST_ASSERT(278 == filteredBI->next());
1362      filteredBI->first();
1363      prtbrks(filteredBI.getAlias(), text, *this);
1364    }
1365  }
1366
1367#if 0
1368  // reenable once french is in
1369  {
1370    logln("Constructing French builder");
1371    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1372    TEST_ASSERT_SUCCESS(status);
1373
1374    logln("Constructing base BI\n");
1375    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1376    TEST_ASSERT_SUCCESS(status);
1377
1378    logln("Building new BI\n");
1379    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1380    TEST_ASSERT_SUCCESS(status);
1381
1382    if(filteredBI.isValid()) {
1383      logln("Testing:");
1384      filteredBI->setText(text);
1385      TEST_ASSERT(20 == filteredBI->next());
1386      TEST_ASSERT(84 == filteredBI->next());
1387      filteredBI->first();
1388      prtbrks(filteredBI.getAlias(), text, *this);
1389    }
1390  }
1391#endif
1392
1393#else
1394  logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1395#endif
1396}
1397
1398//---------------------------------------------
1399// runIndexedTest
1400//---------------------------------------------
1401
1402void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1403{
1404    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1405    switch (index) {
1406     //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1407#if !UCONFIG_NO_FILE_IO
1408        case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1409        case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1410        case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1411        case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1412        case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1413#else
1414        case  0: case  1: case  2: case  3: case  4: name = "skip"; break;
1415#endif
1416        case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1417        case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1418        case  7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1419        case  8: name = "TestBug2190"; if (exec) TestBug2190(); break;
1420#if !UCONFIG_NO_FILE_IO
1421        case  9: name = "TestRegistration"; if (exec) TestRegistration(); break;
1422        case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1423        case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1424        case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1425        case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1426#else
1427        case  9: case 10: case 11: case 12: case 13: name = "skip"; break;
1428#endif
1429        case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
1430
1431#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1432    case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
1433#else
1434    case 15: name="skip"; break;
1435#endif
1436        default: name = ""; break; // needed to end loop
1437    }
1438}
1439
1440//---------------------------------------------
1441//Internal subroutines
1442//---------------------------------------------
1443
1444void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1445     logln((UnicodeString)"testIsBoundary():");
1446        int32_t p = 0;
1447        UBool isB;
1448        for (int32_t i = 0; i < text.length(); i++) {
1449            isB = bi.isBoundary(i);
1450            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1451
1452            if (i == boundaries[p]) {
1453                if (!isB)
1454                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1455                p++;
1456            }
1457            else {
1458                if (isB)
1459                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1460            }
1461        }
1462}
1463void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1464    UnicodeString selected;
1465    UnicodeString expected=CharsToUnicodeString(expectedString);
1466
1467    if(gotoffset != expectedOffset)
1468         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1469    if(start <= gotoffset){
1470        testString.extractBetween(start, gotoffset, selected);
1471    }
1472    else{
1473        testString.extractBetween(gotoffset, start, selected);
1474    }
1475    if(selected.compare(expected) != 0)
1476         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1477    else
1478        logln(prettify("****selected \"" + selected + "\""));
1479}
1480
1481//---------------------------------------------
1482//RBBIWithProtectedFunctions class functions
1483//---------------------------------------------
1484
1485RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1486    : RuleBasedBreakIterator(data, status)
1487{
1488}
1489
1490RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1491    : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1492{
1493}
1494
1495#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1496