1/* Test of conversion from UTF-8 to legacy encodings. 2 Copyright (C) 2007-2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2007. */ 18 19#include <config.h> 20 21#include "uniconv.h" 22 23#include <errno.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include "unistr.h" 28#include "macros.h" 29 30/* Magic number for detecting bounds violations. */ 31#define MAGIC 0x1983EFF1 32 33static size_t * 34new_offsets (size_t n) 35{ 36 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t)); 37 offsets[n] = MAGIC; 38 return offsets; 39} 40 41int 42main () 43{ 44 static enum iconv_ilseq_handler handlers[] = 45 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; 46 size_t h; 47 size_t o; 48 size_t i; 49 50#if HAVE_ICONV 51 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, 52 ISO-8859-2, and UTF-8. */ 53 54 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ 55 for (h = 0; h < SIZEOF (handlers); h++) 56 { 57 enum iconv_ilseq_handler handler = handlers[h]; 58 static const uint8_t input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; 59 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; 60 for (o = 0; o < 2; o++) 61 { 62 size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); 63 size_t length; 64 char *result = u8_conv_to_encoding ("ISO-8859-1", handler, 65 input, u8_strlen (input), 66 offsets, 67 NULL, &length); 68 ASSERT (result != NULL); 69 ASSERT (length == strlen (expected)); 70 ASSERT (memcmp (result, expected, length) == 0); 71 if (o) 72 { 73 for (i = 0; i < 41; i++) 74 ASSERT (offsets[i] == (i < 1 ? i : 75 i == 1 ? (size_t)(-1) : 76 i < 13 ? i - 1 : 77 i == 13 ? (size_t)(-1) : 78 i < 20 ? i - 2 : 79 i == 20 ? (size_t)(-1) : 80 i < 40 ? i - 3 : 81 i == 40 ? (size_t)(-1) : 82 i - 4)); 83 ASSERT (offsets[41] == MAGIC); 84 free (offsets); 85 } 86 free (result); 87 } 88 } 89 90 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ 91 for (h = 0; h < SIZEOF (handlers); h++) 92 { 93 enum iconv_ilseq_handler handler = handlers[h]; 94 static const uint8_t input[] = "Rafa\305\202 Maszkowski"; /* Rafa�� Maszkowski */ 95 for (o = 0; o < 2; o++) 96 { 97 size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); 98 size_t length = 0xdead; 99 char *result = u8_conv_to_encoding ("ISO-8859-1", handler, 100 input, u8_strlen (input), 101 offsets, 102 NULL, &length); 103 switch (handler) 104 { 105 case iconveh_error: 106 ASSERT (result == NULL); 107 ASSERT (errno == EILSEQ); 108 ASSERT (length == 0xdead); 109 break; 110 case iconveh_question_mark: 111 { 112 static const char expected[] = "Rafa? Maszkowski"; 113 static const char expected_translit[] = "Rafal Maszkowski"; 114 ASSERT (result != NULL); 115 ASSERT (length == strlen (expected)); 116 ASSERT (memcmp (result, expected, length) == 0 117 || memcmp (result, expected_translit, length) == 0); 118 if (o) 119 { 120 for (i = 0; i < 17; i++) 121 ASSERT (offsets[i] == (i < 5 ? i : 122 i == 5 ? (size_t)(-1) : 123 i - 1)); 124 ASSERT (offsets[17] == MAGIC); 125 free (offsets); 126 } 127 free (result); 128 } 129 break; 130 case iconveh_escape_sequence: 131 { 132 static const char expected[] = "Rafa\\u0142 Maszkowski"; 133 ASSERT (result != NULL); 134 ASSERT (length == strlen (expected)); 135 ASSERT (memcmp (result, expected, length) == 0); 136 if (o) 137 { 138 for (i = 0; i < 17; i++) 139 ASSERT (offsets[i] == (i < 5 ? i : 140 i == 5 ? (size_t)(-1) : 141 i + 4)); 142 ASSERT (offsets[17] == MAGIC); 143 free (offsets); 144 } 145 free (result); 146 } 147 break; 148 } 149 } 150 } 151 152 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ 153 for (h = 0; h < SIZEOF (handlers); h++) 154 { 155 enum iconv_ilseq_handler handler = handlers[h]; 156 static const uint8_t input[] = "\342"; 157 for (o = 0; o < 2; o++) 158 { 159 size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); 160 size_t length; 161 char *result = u8_conv_to_encoding ("ISO-8859-1", handler, 162 input, u8_strlen (input), 163 offsets, 164 NULL, &length); 165 ASSERT (result != NULL); 166 ASSERT (length == strlen ("")); 167 if (o) 168 { 169 ASSERT (offsets[0] == 0); 170 ASSERT (offsets[1] == MAGIC); 171 free (offsets); 172 } 173 free (result); 174 } 175 } 176 177#endif 178 179 return 0; 180} 181