1/*
2 * Copyright 2016, Haiku, inc.
3 * Distributed under terms of the MIT license.
4 */
5
6
7#include "TextEncoding.h"
8
9#include <unicode/ucnv.h>
10#include <unicode/ucsdet.h>
11
12#include <algorithm>
13
14
15namespace BPrivate {
16
17
18BTextEncoding::BTextEncoding(BString name)
19	:
20	fName(name),
21	fUtf8Converter(NULL),
22	fConverter(NULL)
23{
24}
25
26
27BTextEncoding::BTextEncoding(const char* data, size_t length)
28	:
29	fUtf8Converter(NULL),
30	fConverter(NULL)
31{
32	UErrorCode error = U_ZERO_ERROR;
33
34	UCharsetDetector* detector = ucsdet_open(&error);
35	ucsdet_setText(detector, data, length, &error);
36	const UCharsetMatch* encoding = ucsdet_detect(detector, &error);
37
38	fName = ucsdet_getName(encoding, &error);
39	ucsdet_close(detector);
40}
41
42
43BTextEncoding::~BTextEncoding()
44{
45	if (fUtf8Converter != NULL)
46		ucnv_close(fUtf8Converter);
47
48	if (fConverter != NULL)
49		ucnv_close(fConverter);
50}
51
52
53status_t
54BTextEncoding::InitCheck()
55{
56	if (fName.IsEmpty())
57		return B_NO_INIT;
58	else
59		return B_OK;
60}
61
62
63status_t
64BTextEncoding::Decode(const char* input, size_t& inputLength, char* output,
65	size_t& outputLength)
66{
67	const char* base = input;
68	char* target = output;
69
70	// Optimize the easy case.
71	// Note: we don't check the input to be valid UTF-8 when doing that.
72	if (fName == "UTF-8") {
73		outputLength = std::min(inputLength, outputLength);
74		inputLength = outputLength;
75		memcpy(output, input, inputLength);
76		return B_OK;
77	}
78
79	UErrorCode error = U_ZERO_ERROR;
80
81	if (fUtf8Converter == NULL)
82		fUtf8Converter = ucnv_open("UTF-8", &error);
83
84	if (fConverter == NULL)
85		fConverter = ucnv_open(fName.String(), &error);
86
87	ucnv_convertEx(fUtf8Converter, fConverter, &target, output + outputLength,
88		&base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE,
89		&error);
90
91	// inputLength is set to the number of bytes consumed. We may not use all of
92	// the input data (for example if it is cut in the middle of an utf-8 char).
93	inputLength = base - input;
94	outputLength = target - output;
95
96	if (!U_SUCCESS(error))
97		return B_ERROR;
98
99	return B_OK;
100}
101
102
103status_t
104BTextEncoding::Encode(const char* input, size_t& inputLength, char* output,
105	size_t& outputLength)
106{
107	const char* base = input;
108	char* target = output;
109
110	// Optimize the easy case.
111	// Note: we don't check the input to be valid UTF-8 when doing that.
112	if (fName == "UTF-8") {
113		outputLength = std::min(inputLength, outputLength);
114		inputLength = outputLength;
115		memcpy(output, input, inputLength);
116		return B_OK;
117	}
118
119	UErrorCode error = U_ZERO_ERROR;
120
121	if (fUtf8Converter == NULL)
122		fUtf8Converter = ucnv_open("UTF-8", &error);
123
124	if (fConverter == NULL)
125		fConverter = ucnv_open(fName.String(), &error);
126
127	ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength,
128		&base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE,
129		&error);
130
131	// inputLength is set to the number of bytes consumed. We may not use all of
132	// the input data (for example if it is cut in the middle of an utf-8 char).
133	inputLength = base - input;
134	outputLength = target - output;
135
136	if (!U_SUCCESS(error))
137		return B_ERROR;
138
139	return B_OK;
140}
141
142
143status_t
144BTextEncoding::Flush(char* output, size_t& outputLength)
145{
146	char* target = output;
147
148	if (fName == "UTF-8")
149		return B_OK;
150
151	if (fUtf8Converter == NULL || fConverter == NULL)
152		return B_NO_INIT;
153
154	UErrorCode error = U_ZERO_ERROR;
155
156	ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength,
157		NULL, NULL, NULL, NULL, NULL, NULL, FALSE, TRUE,
158		&error);
159
160	if (!U_SUCCESS(error))
161		return B_ERROR;
162
163	return B_OK;
164}
165
166
167BString
168BTextEncoding::GetName()
169{
170	return fName;
171}
172
173
174};
175