1/*
2 * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Distributed under the terms of the MIT License.
4 */
5#ifndef UTF8_CHAR_H
6#define UTF8_CHAR_H
7
8#include <ctype.h>
9#include <string.h>
10
11#include <UnicodeChar.h>
12
13
14struct UTF8Char {
15	char	bytes[4];
16
17	UTF8Char()
18	{
19		bytes[0] = 0;
20	}
21
22	UTF8Char(char c)
23	{
24		bytes[0] = c;
25	}
26
27	UTF8Char(const char* c)
28	{
29		SetTo(c, ByteCount(*c));
30	}
31
32	UTF8Char(const char* c, int32 count)
33	{
34		SetTo(c, count);
35	}
36
37	void SetTo(const char* c, int32 count)
38	{
39		bytes[0] = c[0];
40		if (count > 1) {
41			bytes[1] = c[1];
42			if (count > 2) {
43				bytes[2] = c[2];
44				if (count > 3)
45					bytes[3] = c[3];
46			}
47		}
48	}
49
50	static int32 ByteCount(char firstChar)
51	{
52		// Note, this does not recognize invalid chars
53		uchar c = firstChar;
54		if (c < 0x80)
55			return 1;
56		if (c < 0xe0)
57			return 2;
58		return c < 0xf0 ? 3 : 4;
59	}
60
61	int32 ByteCount() const
62	{
63		return ByteCount(bytes[0]);
64	}
65
66	bool IsFullWidth() const
67	{
68		switch (BUnicodeChar::EastAsianWidth(BUnicodeChar::FromUTF8(bytes))) {
69			case B_UNICODE_EA_FULLWIDTH:
70			case B_UNICODE_EA_WIDE:
71				return true;
72			default:
73				break;
74		}
75		return false;
76	}
77
78	bool IsSpace() const
79	{
80		return BUnicodeChar::IsSpace(BUnicodeChar::FromUTF8(bytes));
81	}
82
83	bool IsAlNum() const
84	{
85		return BUnicodeChar::IsAlNum(BUnicodeChar::FromUTF8(bytes));
86	}
87
88	UTF8Char ToLower() const
89	{
90		uint32 c = BUnicodeChar::ToLower(BUnicodeChar::FromUTF8(bytes));
91
92		UTF8Char character;
93		char* utf8 = character.bytes;
94		BUnicodeChar::ToUTF8(c, &utf8);
95
96		return character;
97	}
98
99	bool operator==(const UTF8Char& other) const
100	{
101		int32 byteCount = ByteCount();
102		bool equals = bytes[0] == other.bytes[0];
103		if (byteCount > 1 && equals) {
104			equals = bytes[1] == other.bytes[1];
105			if (byteCount > 2 && equals) {
106				equals = bytes[2] == other.bytes[2];
107				if (byteCount > 3 && equals)
108					equals = bytes[3] == other.bytes[3];
109			}
110		}
111		return equals;
112	}
113
114	bool operator!=(const UTF8Char& other) const
115	{
116		return !(*this == other);
117	}
118};
119
120
121#endif	// UTF8_CHAR_H
122