1/* iconvtc.c -- Interface to iconv transcoding routines 2 3 (c) 1998-2003 (W3C) MIT, ERCIM, Keio University 4 See tidy.h for the copyright notice. 5 6 $Id: iconvtc.c,v 1.3 2007/01/30 23:46:51 iccir Exp $ 7*/ 8 9#include <tidy.h> 10#include "forward.h" 11#include "streamio.h" 12 13#ifdef TIDY_ICONV_SUPPORT 14 15#include <iconv.h> 16 17/* maximum number of bytes for a single character */ 18#define TC_INBUFSIZE 16 19 20/* maximum number of characters per byte sequence */ 21#define TC_OUTBUFSIZE 16 22 23Bool IconvInitInputTranscoder(void) 24{ 25 return no; 26} 27 28void IconvUninitInputTranscoder(void) 29{ 30 return; 31} 32 33int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead) 34{ 35 iconv_t cd; 36 TidyInputSource * source; 37 char inbuf[TC_INBUFSIZE] = { 0 }; 38 char outbuf[TC_OUTBUFSIZE] = { 0 }; 39 size_t inbufsize = 0; 40 41 assert( in != NULL ); 42 assert( &in->source != NULL ); 43 assert( bytesRead != NULL ); 44 assert( in->iconvptr != 0 ); 45 46 cd = (iconv_t)in->iconvptr; 47 source = &in->source; 48 49 inbuf[inbufsize++] = (char)firstByte; 50 51 while(inbufsize < TC_INBUFSIZE) 52 { 53 char * outbufptr = (char*)outbuf; 54 char * inbufptr = (char*)inbuf; 55 size_t readNow = inbufsize; 56 size_t writeNow = TC_OUTBUFSIZE; 57 size_t result = 0; 58 int iconv_errno = 0; 59 int nextByte = EndOfStream; 60 61 result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow); 62 iconv_errno = errno; 63 64 if (result != (size_t)(-1)) 65 { 66 int c; 67 68 /* create codepoint from UTF-32LE octets */ 69 c = (unsigned char)outbuf[0]; 70 c += (unsigned char)outbuf[1] << 8; 71 c += (unsigned char)outbuf[2] << 16; 72 c += (unsigned char)outbuf[3] << 32; 73 74 /* set number of read bytes */ 75 *bytesRead = inbufsize; 76 77 return c; 78 } 79 80 assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */ 81 assert( iconv_errno != E2BIG ); /* not enough memory */ 82 assert( iconv_errno == EINVAL ); /* incomplete sequence */ 83 84 /* we need more bytes */ 85 nextByte = source->getByte(source->sourceData); 86 87 if (nextByte == EndOfStream) 88 { 89 /* todo: error message for broken stream? */ 90 91 *bytesRead = inbufsize; 92 return EndOfStream; 93 } 94 95 inbuf[inbufsize++] = (char)nextByte; 96 } 97 98 /* No full character found after reading TC_INBUFSIZE bytes, */ 99 /* give up to read this stream, it's obviously unreadable. */ 100 101 /* todo: error message for broken stream? */ 102 return EndOfStream; 103} 104 105#endif /* TIDY_ICONV_SUPPORT */ 106