1#ifndef __UTF8_H__ 2#define __UTF8_H__ 3 4/* utf8.h -- convert characters to/from UTF-8 5 6 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 7 See tidy.h for the copyright notice. 8 9 CVS Info : 10 11 $Author: iccir $ 12 $Date: 2007/01/30 23:46:52 $ 13 $Revision: 1.3 $ 14 15*/ 16 17#include "platform.h" 18#include "buffio.h" 19 20/* UTF-8 encoding/decoding support 21** Does not convert character "codepoints", i.e. to/from 10646. 22*/ 23 24int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes, 25 TidyInputSource* inp, int* count ); 26 27int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf, 28 TidyOutputSink* outp, int* count ); 29 30 31uint TY_(GetUTF8)( ctmbstr str, uint *ch ); 32tmbstr TY_(PutUTF8)( tmbstr buf, uint c ); 33 34#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */ 35#define UNICODE_BOM UNICODE_BOM_BE 36#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */ 37#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */ 38 39 40Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 ); 41Bool TY_(IsHighSurrogate)( tchar ch ); 42Bool TY_(IsLowSurrogate)( tchar ch ); 43 44Bool TY_(IsCombinedChar)( tchar ch ); 45Bool TY_(IsValidCombinedChar)( tchar ch ); 46 47tchar TY_(CombineSurrogatePair)( tchar high, tchar low ); 48Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low ); 49 50 51 52#endif /* __UTF8_H__ */ 53