1/* 2 * Copyright 2016, Haiku, inc. 3 * Distributed under terms of the MIT license. 4 */ 5 6 7#include "TextEncoding.h" 8 9#include <unicode/ucnv.h> 10#include <unicode/ucsdet.h> 11 12#include <algorithm> 13 14 15namespace BPrivate { 16 17 18BTextEncoding::BTextEncoding(BString name) 19 : 20 fName(name), 21 fUtf8Converter(NULL), 22 fConverter(NULL) 23{ 24} 25 26 27BTextEncoding::BTextEncoding(const char* data, size_t length) 28 : 29 fUtf8Converter(NULL), 30 fConverter(NULL) 31{ 32 UErrorCode error = U_ZERO_ERROR; 33 34 UCharsetDetector* detector = ucsdet_open(&error); 35 ucsdet_setText(detector, data, length, &error); 36 const UCharsetMatch* encoding = ucsdet_detect(detector, &error); 37 38 fName = ucsdet_getName(encoding, &error); 39 ucsdet_close(detector); 40} 41 42 43BTextEncoding::~BTextEncoding() 44{ 45 if (fUtf8Converter != NULL) 46 ucnv_close(fUtf8Converter); 47 48 if (fConverter != NULL) 49 ucnv_close(fConverter); 50} 51 52 53status_t 54BTextEncoding::InitCheck() 55{ 56 if (fName.IsEmpty()) 57 return B_NO_INIT; 58 else 59 return B_OK; 60} 61 62 63status_t 64BTextEncoding::Decode(const char* input, size_t& inputLength, char* output, 65 size_t& outputLength) 66{ 67 const char* base = input; 68 char* target = output; 69 70 // Optimize the easy case. 71 // Note: we don't check the input to be valid UTF-8 when doing that. 72 if (fName == "UTF-8") { 73 outputLength = std::min(inputLength, outputLength); 74 inputLength = outputLength; 75 memcpy(output, input, inputLength); 76 return B_OK; 77 } 78 79 UErrorCode error = U_ZERO_ERROR; 80 81 if (fUtf8Converter == NULL) 82 fUtf8Converter = ucnv_open("UTF-8", &error); 83 84 if (fConverter == NULL) 85 fConverter = ucnv_open(fName.String(), &error); 86 87 ucnv_convertEx(fUtf8Converter, fConverter, &target, output + outputLength, 88 &base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE, 89 &error); 90 91 // inputLength is set to the number of bytes consumed. We may not use all of 92 // the input data (for example if it is cut in the middle of an utf-8 char). 93 inputLength = base - input; 94 outputLength = target - output; 95 96 if (!U_SUCCESS(error)) 97 return B_ERROR; 98 99 return B_OK; 100} 101 102 103status_t 104BTextEncoding::Encode(const char* input, size_t& inputLength, char* output, 105 size_t& outputLength) 106{ 107 const char* base = input; 108 char* target = output; 109 110 // Optimize the easy case. 111 // Note: we don't check the input to be valid UTF-8 when doing that. 112 if (fName == "UTF-8") { 113 outputLength = std::min(inputLength, outputLength); 114 inputLength = outputLength; 115 memcpy(output, input, inputLength); 116 return B_OK; 117 } 118 119 UErrorCode error = U_ZERO_ERROR; 120 121 if (fUtf8Converter == NULL) 122 fUtf8Converter = ucnv_open("UTF-8", &error); 123 124 if (fConverter == NULL) 125 fConverter = ucnv_open(fName.String(), &error); 126 127 ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength, 128 &base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE, 129 &error); 130 131 // inputLength is set to the number of bytes consumed. We may not use all of 132 // the input data (for example if it is cut in the middle of an utf-8 char). 133 inputLength = base - input; 134 outputLength = target - output; 135 136 if (!U_SUCCESS(error)) 137 return B_ERROR; 138 139 return B_OK; 140} 141 142 143status_t 144BTextEncoding::Flush(char* output, size_t& outputLength) 145{ 146 char* target = output; 147 148 if (fName == "UTF-8") 149 return B_OK; 150 151 if (fUtf8Converter == NULL || fConverter == NULL) 152 return B_NO_INIT; 153 154 UErrorCode error = U_ZERO_ERROR; 155 156 ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength, 157 NULL, NULL, NULL, NULL, NULL, NULL, FALSE, TRUE, 158 &error); 159 160 if (!U_SUCCESS(error)) 161 return B_ERROR; 162 163 return B_OK; 164} 165 166 167BString 168BTextEncoding::GetName() 169{ 170 return fName; 171} 172 173 174}; 175