1/* 2***************************************************************************************** 3* Copyright (C) 2006-2008 Apple Inc. All Rights Reserved. 4***************************************************************************************** 5*/ 6 7#include "unicode/utypes.h" 8 9#if !UCONFIG_NO_BREAK_ITERATION 10 11#include "unicode/urbtok.h" 12 13#include "rbtok.h" 14#include "unicode/ustring.h" 15#include "rbbidata.h" 16#include "cmemory.h" 17#include "ucmndata.h" 18 19U_NAMESPACE_USE 20 21U_CAPI UBreakIterator* U_EXPORT2 22urbtok_openRules(const UChar *rules, 23 int32_t rulesLength, 24 UParseError *parseErr, 25 UErrorCode *status) 26{ 27 if (status == NULL || U_FAILURE(*status)){ 28 return 0; 29 } 30 31 BreakIterator *result = 0; 32 UnicodeString ruleString(rules, rulesLength); 33 result = new RuleBasedTokenizer(ruleString, *parseErr, *status); 34 if(U_FAILURE(*status)) { 35 return 0; 36 } 37 38 UBreakIterator *uBI = (UBreakIterator *)result; 39 return uBI; 40} 41 42U_CAPI UBreakIterator* U_EXPORT2 43urbtok_openBinaryRules(const uint8_t *rules, 44 UErrorCode *status) 45{ 46 if (status == NULL || U_FAILURE(*status)){ 47 return 0; 48 } 49 50 uint32_t length = ((const RBBIDataHeader *)rules)->fLength; 51 uint8_t *ruleCopy = (uint8_t *) uprv_malloc(length); 52 if (ruleCopy == 0) 53 { 54 *status = U_MEMORY_ALLOCATION_ERROR; 55 return 0; 56 } 57 // Copy the rules so they can be adopted by the tokenizer 58 uprv_memcpy(ruleCopy, rules, length); 59 BreakIterator *result = 0; 60 result = new RuleBasedTokenizer(ruleCopy, *status); 61 if(U_FAILURE(*status)) { 62 return 0; 63 } 64 65 UBreakIterator *uBI = (UBreakIterator *)result; 66 return uBI; 67} 68 69U_CAPI UBreakIterator* U_EXPORT2 70urbtok_openBinaryRulesNoCopy(const uint8_t *rules, 71 UErrorCode *status) 72{ 73 if (status == NULL || U_FAILURE(*status)){ 74 return 0; 75 } 76 77 BreakIterator *result = 0; 78 result = new RuleBasedTokenizer(rules, RuleBasedTokenizer::kDontAdopt, *status); 79 if(U_FAILURE(*status)) { 80 return 0; 81 } 82 83 UBreakIterator *uBI = (UBreakIterator *)result; 84 return uBI; 85} 86 87U_CAPI uint32_t U_EXPORT2 88urbtok_getBinaryRules(UBreakIterator *bi, 89 uint8_t *buffer, 90 uint32_t buffSize, 91 UErrorCode *status) 92{ 93 if (status == NULL || U_FAILURE(*status)){ 94 return 0; 95 } 96 97 uint32_t length; 98 const uint8_t *rules = ((RuleBasedBreakIterator *)bi)->getBinaryRules(length); 99 if (buffer != 0) 100 { 101 if (length > buffSize) 102 { 103 *status = U_BUFFER_OVERFLOW_ERROR; 104 } 105 else 106 { 107 uprv_memcpy(buffer, rules, length); 108 } 109 } 110 return length; 111} 112 113U_CAPI int32_t U_EXPORT2 114urbtok_tokenize(UBreakIterator *bi, 115 int32_t maxTokens, 116 RuleBasedTokenRange *outTokens, 117 unsigned long *outTokenFlags) 118{ 119 return ((RuleBasedTokenizer *)bi)->tokenize(maxTokens, outTokens, outTokenFlags); 120} 121 122U_CAPI void U_EXPORT2 123urbtok_swapBinaryRules(const uint8_t *rules, 124 uint8_t *buffer, 125 UBool inIsBigEndian, 126 UBool outIsBigEndian, 127 UErrorCode *status) 128{ 129 DataHeader *outH = NULL; 130 int32_t outLength = 0; 131 UDataSwapper *ds = udata_openSwapper(inIsBigEndian, U_CHARSET_FAMILY, outIsBigEndian, U_CHARSET_FAMILY, status); 132 133 if (status == NULL || U_FAILURE(*status)){ 134 return; 135 } 136 137 uint32_t length = ds->readUInt32(((const RBBIDataHeader *)rules)->fLength); 138 uint32_t totalLength = sizeof(DataHeader) + length; 139 140 DataHeader *dh = (DataHeader *)uprv_malloc(totalLength); 141 if (dh == 0) 142 { 143 *status = U_MEMORY_ALLOCATION_ERROR; 144 goto closeSwapper; 145 } 146 outH = (DataHeader *)uprv_malloc(totalLength); 147 if (outH == 0) 148 { 149 *status = U_MEMORY_ALLOCATION_ERROR; 150 uprv_free(dh); 151 goto closeSwapper; 152 } 153 dh->dataHeader.headerSize = ds->readUInt16(sizeof(DataHeader)); 154 dh->dataHeader.magic1 = 0xda; 155 dh->dataHeader.magic2 = 0x27; 156 dh->info.size = ds->readUInt16(sizeof(UDataInfo)); 157 dh->info.reservedWord = 0; 158 dh->info.isBigEndian = inIsBigEndian; 159 dh->info.charsetFamily = U_CHARSET_FAMILY; 160 dh->info.sizeofUChar = U_SIZEOF_UCHAR; 161 dh->info.reservedByte = 0; 162 uprv_memcpy(dh->info.dataFormat, "Brk ", sizeof(dh->info.dataFormat)); 163 uprv_memcpy(dh->info.formatVersion, ((const RBBIDataHeader *)rules)->fFormatVersion, sizeof(dh->info.formatVersion)); 164 dh->info.dataVersion[0] = 4; // Unicode version 165 dh->info.dataVersion[1] = 1; 166 dh->info.dataVersion[2] = 0; 167 dh->info.dataVersion[3] = 0; 168 uprv_memcpy(((uint8_t*)dh) + sizeof(DataHeader), rules, length); 169 170 outLength = ubrk_swap(ds, dh, totalLength, outH, status); 171 if (U_SUCCESS(*status) && outLength != totalLength) // something went horribly wrong 172 { 173 *status = U_INVALID_FORMAT_ERROR; 174 } 175 176 if (U_SUCCESS(*status)) 177 { 178 uprv_memcpy(buffer, ((uint8_t *)outH) + sizeof(DataHeader), length); 179 } 180 uprv_free(outH); 181 uprv_free(dh); 182 183closeSwapper: 184 udata_closeSwapper(ds); 185} 186 187 188#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 189