1/*
2*****************************************************************************************
3* Copyright (C) 2006-2008 Apple Inc. All Rights Reserved.
4*****************************************************************************************
5*/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_BREAK_ITERATION
10
11#include "unicode/urbtok.h"
12
13#include "rbtok.h"
14#include "unicode/ustring.h"
15#include "rbbidata.h"
16#include "cmemory.h"
17#include "ucmndata.h"
18
19U_NAMESPACE_USE
20
21U_CAPI UBreakIterator* U_EXPORT2
22urbtok_openRules(const UChar     *rules,
23               int32_t         rulesLength,
24               UParseError     *parseErr,
25               UErrorCode      *status)
26{
27    if (status == NULL || U_FAILURE(*status)){
28        return 0;
29    }
30
31    BreakIterator *result = 0;
32    UnicodeString ruleString(rules, rulesLength);
33    result = new RuleBasedTokenizer(ruleString, *parseErr, *status);
34    if(U_FAILURE(*status)) {
35        return 0;
36    }
37
38    UBreakIterator *uBI = (UBreakIterator *)result;
39    return uBI;
40}
41
42U_CAPI UBreakIterator* U_EXPORT2
43urbtok_openBinaryRules(const uint8_t *rules,
44               UErrorCode      *status)
45{
46    if (status == NULL || U_FAILURE(*status)){
47        return 0;
48    }
49
50    uint32_t length = ((const RBBIDataHeader *)rules)->fLength;
51    uint8_t *ruleCopy = (uint8_t *) uprv_malloc(length);
52    if (ruleCopy == 0)
53    {
54        *status = U_MEMORY_ALLOCATION_ERROR;
55        return 0;
56    }
57    // Copy the rules so they can be adopted by the tokenizer
58    uprv_memcpy(ruleCopy, rules, length);
59    BreakIterator *result = 0;
60    result = new RuleBasedTokenizer(ruleCopy, *status);
61    if(U_FAILURE(*status)) {
62        return 0;
63    }
64
65    UBreakIterator *uBI = (UBreakIterator *)result;
66    return uBI;
67}
68
69U_CAPI UBreakIterator* U_EXPORT2
70urbtok_openBinaryRulesNoCopy(const uint8_t *rules,
71               UErrorCode      *status)
72{
73    if (status == NULL || U_FAILURE(*status)){
74        return 0;
75    }
76
77    BreakIterator *result = 0;
78    result = new RuleBasedTokenizer(rules, RuleBasedTokenizer::kDontAdopt, *status);
79    if(U_FAILURE(*status)) {
80        return 0;
81    }
82
83    UBreakIterator *uBI = (UBreakIterator *)result;
84    return uBI;
85}
86
87U_CAPI uint32_t U_EXPORT2
88urbtok_getBinaryRules(UBreakIterator      *bi,
89                uint8_t             *buffer,
90                uint32_t            buffSize,
91                UErrorCode          *status)
92{
93    if (status == NULL || U_FAILURE(*status)){
94        return 0;
95    }
96
97    uint32_t length;
98    const uint8_t *rules = ((RuleBasedBreakIterator *)bi)->getBinaryRules(length);
99    if (buffer != 0)
100    {
101        if (length > buffSize)
102        {
103            *status = U_BUFFER_OVERFLOW_ERROR;
104        }
105        else
106        {
107            uprv_memcpy(buffer, rules, length);
108        }
109    }
110    return length;
111}
112
113U_CAPI int32_t U_EXPORT2
114urbtok_tokenize(UBreakIterator      *bi,
115               int32_t              maxTokens,
116               RuleBasedTokenRange  *outTokens,
117               unsigned long        *outTokenFlags)
118{
119    return ((RuleBasedTokenizer *)bi)->tokenize(maxTokens, outTokens, outTokenFlags);
120}
121
122U_CAPI void U_EXPORT2
123urbtok_swapBinaryRules(const uint8_t *rules,
124               uint8_t          *buffer,
125               UBool            inIsBigEndian,
126               UBool            outIsBigEndian,
127               UErrorCode       *status)
128{
129    DataHeader *outH = NULL;
130    int32_t outLength = 0;
131    UDataSwapper *ds = udata_openSwapper(inIsBigEndian, U_CHARSET_FAMILY, outIsBigEndian, U_CHARSET_FAMILY, status);
132
133    if (status == NULL || U_FAILURE(*status)){
134        return;
135    }
136
137    uint32_t length = ds->readUInt32(((const RBBIDataHeader *)rules)->fLength);
138    uint32_t totalLength = sizeof(DataHeader) + length;
139
140    DataHeader *dh = (DataHeader *)uprv_malloc(totalLength);
141    if (dh == 0)
142    {
143        *status = U_MEMORY_ALLOCATION_ERROR;
144        goto closeSwapper;
145    }
146    outH = (DataHeader *)uprv_malloc(totalLength);
147    if (outH == 0)
148    {
149        *status = U_MEMORY_ALLOCATION_ERROR;
150        uprv_free(dh);
151        goto closeSwapper;
152    }
153    dh->dataHeader.headerSize = ds->readUInt16(sizeof(DataHeader));
154    dh->dataHeader.magic1 = 0xda;
155    dh->dataHeader.magic2 = 0x27;
156    dh->info.size = ds->readUInt16(sizeof(UDataInfo));
157    dh->info.reservedWord = 0;
158    dh->info.isBigEndian = inIsBigEndian;
159    dh->info.charsetFamily = U_CHARSET_FAMILY;
160    dh->info.sizeofUChar = U_SIZEOF_UCHAR;
161    dh->info.reservedByte = 0;
162    uprv_memcpy(dh->info.dataFormat, "Brk ", sizeof(dh->info.dataFormat));
163    uprv_memcpy(dh->info.formatVersion, ((const RBBIDataHeader *)rules)->fFormatVersion, sizeof(dh->info.formatVersion));
164    dh->info.dataVersion[0] = 4;        // Unicode version
165    dh->info.dataVersion[1] = 1;
166    dh->info.dataVersion[2] = 0;
167    dh->info.dataVersion[3] = 0;
168    uprv_memcpy(((uint8_t*)dh) + sizeof(DataHeader), rules, length);
169
170    outLength = ubrk_swap(ds, dh, totalLength, outH, status);
171    if (U_SUCCESS(*status) && outLength != totalLength)   // something went horribly wrong
172    {
173        *status = U_INVALID_FORMAT_ERROR;
174    }
175
176    if (U_SUCCESS(*status))
177    {
178        uprv_memcpy(buffer, ((uint8_t *)outH) + sizeof(DataHeader), length);
179    }
180    uprv_free(outH);
181    uprv_free(dh);
182
183closeSwapper:
184    udata_closeSwapper(ds);
185}
186
187
188#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
189