1/* Test of conversion from UTF-8 to legacy encodings.
2   Copyright (C) 2007-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
18
19#include <config.h>
20
21#include "uniconv.h"
22
23#include <errno.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include "unistr.h"
28#include "macros.h"
29
30/* Magic number for detecting bounds violations.  */
31#define MAGIC 0x1983EFF1
32
33static size_t *
34new_offsets (size_t n)
35{
36  size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
37  offsets[n] = MAGIC;
38  return offsets;
39}
40
41int
42main ()
43{
44  static enum iconv_ilseq_handler handlers[] =
45    { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
46  size_t h;
47  size_t o;
48  size_t i;
49
50#if HAVE_ICONV
51  /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
52     ISO-8859-2, and UTF-8.  */
53
54  /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
55  for (h = 0; h < SIZEOF (handlers); h++)
56    {
57      enum iconv_ilseq_handler handler = handlers[h];
58      static const uint8_t input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
59      static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
60      for (o = 0; o < 2; o++)
61        {
62          size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
63          size_t length;
64          char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
65                                              input, u8_strlen (input),
66                                              offsets,
67                                              NULL, &length);
68          ASSERT (result != NULL);
69          ASSERT (length == strlen (expected));
70          ASSERT (memcmp (result, expected, length) == 0);
71          if (o)
72            {
73              for (i = 0; i < 41; i++)
74                ASSERT (offsets[i] == (i < 1 ? i :
75                                       i == 1 ? (size_t)(-1) :
76                                       i < 13 ? i - 1 :
77                                       i == 13 ? (size_t)(-1) :
78                                       i < 20 ? i - 2 :
79                                       i == 20 ? (size_t)(-1) :
80                                       i < 40 ? i - 3 :
81                                       i == 40 ? (size_t)(-1) :
82                                       i - 4));
83              ASSERT (offsets[41] == MAGIC);
84              free (offsets);
85            }
86          free (result);
87        }
88    }
89
90  /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
91  for (h = 0; h < SIZEOF (handlers); h++)
92    {
93      enum iconv_ilseq_handler handler = handlers[h];
94      static const uint8_t input[] = "Rafa\305\202 Maszkowski"; /* Rafa�� Maszkowski */
95      for (o = 0; o < 2; o++)
96        {
97          size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
98          size_t length = 0xdead;
99          char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
100                                              input, u8_strlen (input),
101                                              offsets,
102                                              NULL, &length);
103          switch (handler)
104            {
105            case iconveh_error:
106              ASSERT (result == NULL);
107              ASSERT (errno == EILSEQ);
108              ASSERT (length == 0xdead);
109              break;
110            case iconveh_question_mark:
111              {
112                static const char expected[] = "Rafa? Maszkowski";
113                static const char expected_translit[] = "Rafal Maszkowski";
114                ASSERT (result != NULL);
115                ASSERT (length == strlen (expected));
116                ASSERT (memcmp (result, expected, length) == 0
117                        || memcmp (result, expected_translit, length) == 0);
118                if (o)
119                  {
120                    for (i = 0; i < 17; i++)
121                      ASSERT (offsets[i] == (i < 5 ? i :
122                                             i == 5 ? (size_t)(-1) :
123                                             i - 1));
124                    ASSERT (offsets[17] == MAGIC);
125                    free (offsets);
126                  }
127                free (result);
128              }
129              break;
130            case iconveh_escape_sequence:
131              {
132                static const char expected[] = "Rafa\\u0142 Maszkowski";
133                ASSERT (result != NULL);
134                ASSERT (length == strlen (expected));
135                ASSERT (memcmp (result, expected, length) == 0);
136                if (o)
137                  {
138                    for (i = 0; i < 17; i++)
139                      ASSERT (offsets[i] == (i < 5 ? i :
140                                             i == 5 ? (size_t)(-1) :
141                                             i + 4));
142                    ASSERT (offsets[17] == MAGIC);
143                    free (offsets);
144                  }
145                free (result);
146              }
147              break;
148            }
149        }
150    }
151
152  /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL.  */
153  for (h = 0; h < SIZEOF (handlers); h++)
154    {
155      enum iconv_ilseq_handler handler = handlers[h];
156      static const uint8_t input[] = "\342";
157      for (o = 0; o < 2; o++)
158        {
159          size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
160          size_t length;
161          char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
162                                              input, u8_strlen (input),
163                                              offsets,
164                                              NULL, &length);
165          ASSERT (result != NULL);
166          ASSERT (length == strlen (""));
167          if (o)
168            {
169              ASSERT (offsets[0] == 0);
170              ASSERT (offsets[1] == MAGIC);
171              free (offsets);
172            }
173          free (result);
174        }
175    }
176
177#endif
178
179  return 0;
180}
181