1/***************************************************************** 2** encoding.h 3** 4** Copyright 1998 Clark Cooper 5** All rights reserved. 6** 7** This program is free software; you can redistribute it and/or 8** modify it under the same terms as Perl itself. 9*/ 10 11#ifndef ENCODING_H 12#define ENCODING_H 1 13 14#define ENCMAP_MAGIC 0xfeebface 15 16typedef struct prefixmap { 17 unsigned char min; 18 unsigned char len; /* 0 => 256 */ 19 unsigned short bmap_start; 20 unsigned char ispfx[32]; 21 unsigned char ischar[32]; 22} PrefixMap; 23 24typedef struct encinf 25{ 26 unsigned short prefixes_size; 27 unsigned short bytemap_size; 28 int firstmap[256]; 29 PrefixMap *prefixes; 30 unsigned short *bytemap; 31} Encinfo; 32 33typedef struct encmaphdr 34{ 35 unsigned int magic; 36 char name[40]; 37 unsigned short pfsize; 38 unsigned short bmsize; 39 int map[256]; 40} Encmap_Header; 41 42/*================================================================ 43** Structure of Encoding map binary encoding 44** 45** Note that all shorts and ints are in network order, 46** so when packing or unpacking with perl, use 'n' and 'N' respectively. 47** In C, use the htonl family of functions. 48** 49** The basic structure is: 50** 51** _______________________ 52** |Header (including map expat needs for 1st byte) 53** |PrefixMap * pfsize 54** | This section isn't included for single-byte encodings. 55** | For multiple byte encodings, when a byte represents a prefix 56** | then it indexes into this vector instead of mapping to a 57** | Unicode character. The PrefixMap type is declared above. The 58** | ispfx and ischar fields are bitvectors indicating whether 59** | the byte being mapped is a prefix or character respectively. 60** | If neither is set, then the character is not mapped to Unicode. 61** | 62** | The min field is the 1st byte mapped for this prefix; the 63** | len field is the number of bytes mapped; and bmap_start is 64** | the starting index of the map for this prefix in the overall 65** | map (next section). 66** |unsigned short * bmsize 67** | This section also is omitted for single-byte encodings. 68** | Each short is either a Unicode scalar or an index into the 69** | PrefixMap vector. 70** 71** The header for these files is declared above as the Encmap_Header type. 72** The magic field is a magic number which should match the ENCMAP_MAGIC 73** macro above. The next 40 bytes stores IANA registered name for the 74** encoding. The pfsize field holds the number of PrefixMaps, which should 75** be zero for single byte encodings. The bmsize field holds the number of 76** shorts used for the overall map. 77** 78** The map field contains either the Unicode scalar encoded by the 1st byte 79** or -n where n is the number of bytes that such a 1st byte implies (Expat 80** requires that the number of bytes to encode a character is indicated by 81** the 1st byte) or -1 if the byte doesn't map to any Unicode character. 82** 83** If the encoding is a multiple byte encoding, then there will be PrefixMap 84** and character map sections. The 1st PrefixMap (index 0), covers a range 85** of bytes that includes all 1st byte prefixes. 86** 87** Look at convert_to_unicode in Expat.xs to see how this data structure 88** is used. 89*/ 90 91#endif /* ndef ENCODING_H */ 92