1/* iconvtc.c -- Interface to iconv transcoding routines
2
3  (c) 1998-2003 (W3C) MIT, ERCIM, Keio University
4  See tidy.h for the copyright notice.
5
6  $Id$
7*/
8
9#include <tidy.h>
10#include "forward.h"
11#include "streamio.h"
12
13#ifdef TIDY_ICONV_SUPPORT
14
15#include <iconv.h>
16
17/* maximum number of bytes for a single character */
18#define TC_INBUFSIZE  16
19
20/* maximum number of characters per byte sequence */
21#define TC_OUTBUFSIZE 16
22
23Bool IconvInitInputTranscoder(void)
24{
25    return no;
26}
27
28void IconvUninitInputTranscoder(void)
29{
30    return;
31}
32
33int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
34{
35    iconv_t cd;
36    TidyInputSource * source;
37    char inbuf[TC_INBUFSIZE] = { 0 };
38    char outbuf[TC_OUTBUFSIZE] = { 0 };
39    size_t inbufsize = 0;
40
41    assert( in != NULL );
42    assert( &in->source != NULL );
43    assert( bytesRead != NULL );
44    assert( in->iconvptr != 0 );
45
46    cd = (iconv_t)in->iconvptr;
47    source = &in->source;
48
49    inbuf[inbufsize++] = (char)firstByte;
50
51    while(inbufsize < TC_INBUFSIZE)
52    {
53        char * outbufptr = (char*)outbuf;
54        char * inbufptr = (char*)inbuf;
55        size_t readNow = inbufsize;
56        size_t writeNow = TC_OUTBUFSIZE;
57        size_t result = 0;
58        int iconv_errno = 0;
59        int nextByte = EndOfStream;
60
61        result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
62        iconv_errno = errno;
63
64        if (result != (size_t)(-1))
65        {
66            int c;
67
68            /* create codepoint from UTF-32LE octets */
69            c = (unsigned char)outbuf[0];
70            c += (unsigned char)outbuf[1] << 8;
71            c += (unsigned char)outbuf[2] << 16;
72            c += (unsigned char)outbuf[3] << 32;
73
74            /* set number of read bytes */
75            *bytesRead = inbufsize;
76
77            return c;
78        }
79
80        assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
81        assert( iconv_errno != E2BIG );  /* not enough memory         */
82        assert( iconv_errno == EINVAL ); /* incomplete sequence       */
83
84        /* we need more bytes */
85        nextByte = source->getByte(source->sourceData);
86
87        if (nextByte == EndOfStream)
88        {
89            /* todo: error message for broken stream? */
90
91            *bytesRead = inbufsize;
92            return EndOfStream;
93        }
94
95        inbuf[inbufsize++] = (char)nextByte;
96    }
97
98    /* No full character found after reading TC_INBUFSIZE bytes, */
99    /* give up to read this stream, it's obviously unreadable.   */
100
101    /* todo: error message for broken stream? */
102    return EndOfStream;
103}
104
105#endif /* TIDY_ICONV_SUPPORT */
106