1#ifndef __UTF8_H__
2#define __UTF8_H__
3
4/* utf8.h -- convert characters to/from UTF-8
5
6  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
7  See tidy.h for the copyright notice.
8
9  CVS Info :
10
11    $Author: iccir $
12    $Date: 2007/01/30 23:46:52 $
13    $Revision: 1.3 $
14
15*/
16
17#include "platform.h"
18#include "buffio.h"
19
20/* UTF-8 encoding/decoding support
21** Does not convert character "codepoints", i.e. to/from 10646.
22*/
23
24int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
25                                TidyInputSource* inp, int* count );
26
27int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
28                                TidyOutputSink* outp, int* count );
29
30
31uint  TY_(GetUTF8)( ctmbstr str, uint *ch );
32tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
33
34#define UNICODE_BOM_BE   0xFEFF   /* big-endian (default) UNICODE BOM */
35#define UNICODE_BOM      UNICODE_BOM_BE
36#define UNICODE_BOM_LE   0xFFFE   /* little-endian UNICODE BOM */
37#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
38
39
40Bool    TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
41Bool    TY_(IsHighSurrogate)( tchar ch );
42Bool    TY_(IsLowSurrogate)( tchar ch );
43
44Bool    TY_(IsCombinedChar)( tchar ch );
45Bool    TY_(IsValidCombinedChar)( tchar ch );
46
47tchar   TY_(CombineSurrogatePair)( tchar high, tchar low );
48Bool    TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
49
50
51
52#endif /* __UTF8_H__ */
53