1/*****************************************************************
2** encoding.h
3**
4** Copyright 1998 Clark Cooper
5** All rights reserved.
6**
7** This program is free software; you can redistribute it and/or
8** modify it under the same terms as Perl itself.
9*/
10
11#ifndef ENCODING_H
12#define ENCODING_H 1
13
14#define ENCMAP_MAGIC	0xfeebface
15
16typedef struct prefixmap {
17  unsigned char	min;
18  unsigned char len;			/* 0 => 256 */
19  unsigned short bmap_start;
20  unsigned char ispfx[32];
21  unsigned char ischar[32];
22} PrefixMap;
23
24typedef struct encinf
25{
26  unsigned short	prefixes_size;
27  unsigned short	bytemap_size;
28  int			firstmap[256];
29  PrefixMap		*prefixes;
30  unsigned short	*bytemap;
31} Encinfo;
32
33typedef struct encmaphdr
34{
35  unsigned int		magic;
36  char			name[40];
37  unsigned short	pfsize;
38  unsigned short	bmsize;
39  int			map[256];
40} Encmap_Header;
41
42/*================================================================
43** Structure of Encoding map binary encoding
44**
45** Note that all shorts and ints are in network order,
46** so when packing or unpacking with perl, use 'n' and 'N' respectively.
47** In C, use the htonl family of functions.
48**
49** The basic structure is:
50**
51**	_______________________
52**	|Header (including map expat needs for 1st byte)
53**	|PrefixMap * pfsize
54**	|   This section isn't included for single-byte encodings.
55**	|   For multiple byte encodings, when a byte represents a prefix
56**	|   then it indexes into this vector instead of mapping to a
57**	|   Unicode character. The PrefixMap type is declared above. The
58**	|   ispfx and ischar fields are bitvectors indicating whether
59**	|   the byte being mapped is a prefix or character respectively.
60**	|   If neither is set, then the character is not mapped to Unicode.
61**	|
62**	|   The min field is the 1st byte mapped for this prefix; the
63**	|   len field is the number of bytes mapped; and bmap_start is
64**	|   the starting index of the map for this prefix in the overall
65**	|   map (next section).
66**	|unsigned short * bmsize
67**	|   This section also is omitted for single-byte encodings.
68**	|   Each short is either a Unicode scalar or an index into the
69**	|   PrefixMap vector.
70**
71** The header for these files is declared above as the Encmap_Header type.
72** The magic field is a magic number which should match the ENCMAP_MAGIC
73** macro above. The next 40 bytes stores IANA registered name for the
74** encoding. The pfsize field holds the number of PrefixMaps, which should
75** be zero for single byte encodings. The bmsize field holds the number of
76** shorts used for the overall map.
77**
78** The map field contains either the Unicode scalar encoded by the 1st byte
79** or -n where n is the number of bytes that such a 1st byte implies (Expat
80** requires that the number of bytes to encode a character is indicated by
81** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
82**
83** If the encoding is a multiple byte encoding, then there will be PrefixMap
84** and character map sections. The 1st PrefixMap (index 0), covers a range
85** of bytes that includes all 1st byte prefixes.
86**
87** Look at convert_to_unicode in Expat.xs to see how this data structure
88** is used.
89*/
90
91#endif  /* ndef ENCODING_H */
92