/* * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. * Distributed under the terms of the MIT License. */ #ifndef UTF8_CHAR_H #define UTF8_CHAR_H #include #include #include struct UTF8Char { char bytes[4]; UTF8Char() { bytes[0] = 0; } UTF8Char(char c) { bytes[0] = c; } UTF8Char(const char* c) { SetTo(c, ByteCount(*c)); } UTF8Char(const char* c, int32 count) { SetTo(c, count); } void SetTo(const char* c, int32 count) { bytes[0] = c[0]; if (count > 1) { bytes[1] = c[1]; if (count > 2) { bytes[2] = c[2]; if (count > 3) bytes[3] = c[3]; } } } static int32 ByteCount(char firstChar) { // Note, this does not recognize invalid chars uchar c = firstChar; if (c < 0x80) return 1; if (c < 0xe0) return 2; return c < 0xf0 ? 3 : 4; } int32 ByteCount() const { return ByteCount(bytes[0]); } bool IsFullWidth() const { switch (BUnicodeChar::EastAsianWidth(BUnicodeChar::FromUTF8(bytes))) { case B_UNICODE_EA_FULLWIDTH: case B_UNICODE_EA_WIDE: return true; default: break; } return false; } bool IsSpace() const { return BUnicodeChar::IsSpace(BUnicodeChar::FromUTF8(bytes)); } bool IsAlNum() const { return BUnicodeChar::IsAlNum(BUnicodeChar::FromUTF8(bytes)); } UTF8Char ToLower() const { uint32 c = BUnicodeChar::ToLower(BUnicodeChar::FromUTF8(bytes)); UTF8Char character; char* utf8 = character.bytes; BUnicodeChar::ToUTF8(c, &utf8); return character; } bool operator==(const UTF8Char& other) const { int32 byteCount = ByteCount(); bool equals = bytes[0] == other.bytes[0]; if (byteCount > 1 && equals) { equals = bytes[1] == other.bytes[1]; if (byteCount > 2 && equals) { equals = bytes[2] == other.bytes[2]; if (byteCount > 3 && equals) equals = bytes[3] == other.bytes[3]; } } return equals; } bool operator!=(const UTF8Char& other) const { return !(*this == other); } }; #endif // UTF8_CHAR_H