1/* Copyright (C) 1999-2001, 2003, 2011 Bruno Haible. 2 This file is not part of the GNU LIBICONV Library. 3 This file is put into the public domain. */ 4 5#include "iconv_string.h" 6#include <iconv.h> 7#include <errno.h> 8#include <stdlib.h> 9#include <string.h> 10 11#define tmpbufsize 4096 12 13int iconv_string (const char* tocode, const char* fromcode, 14 const char* start, const char* end, 15 char** resultp, size_t* lengthp) 16{ 17 iconv_t cd = iconv_open(tocode,fromcode); 18 size_t length; 19 char* result; 20 if (cd == (iconv_t)(-1)) { 21 if (errno != EINVAL) 22 return -1; 23 /* Unsupported fromcode or tocode. Check whether the caller requested 24 autodetection. */ 25 if (!strcmp(fromcode,"autodetect_utf8")) { 26 int ret; 27 /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would 28 be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */ 29 ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp); 30 if (!(ret < 0 && errno == EILSEQ)) 31 return ret; 32 ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp); 33 return ret; 34 } 35 if (!strcmp(fromcode,"autodetect_jp")) { 36 int ret; 37 /* Try 7-bit encoding first. If the input contains bytes >= 0x80, 38 it will fail. */ 39 ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp); 40 if (!(ret < 0 && errno == EILSEQ)) 41 return ret; 42 /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This 43 is unavoidable. People will condemn SHIFT_JIS. 44 If we tried SHIFT_JIS first, then some short EUC-JP inputs would 45 come out wrong, and people would condemn EUC-JP and Unix, which 46 would not be good. */ 47 ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp); 48 if (!(ret < 0 && errno == EILSEQ)) 49 return ret; 50 /* Finally try SHIFT_JIS. */ 51 ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp); 52 return ret; 53 } 54 if (!strcmp(fromcode,"autodetect_kr")) { 55 int ret; 56 /* Try 7-bit encoding first. If the input contains bytes >= 0x80, 57 it will fail. */ 58 ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp); 59 if (!(ret < 0 && errno == EILSEQ)) 60 return ret; 61 /* Finally try EUC-KR. */ 62 ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp); 63 return ret; 64 } 65 errno = EINVAL; 66 return -1; 67 } 68 /* Determine the length we need. */ 69 { 70 size_t count = 0; 71 char tmpbuf[tmpbufsize]; 72 const char* inptr = start; 73 size_t insize = end-start; 74 while (insize > 0) { 75 char* outptr = tmpbuf; 76 size_t outsize = tmpbufsize; 77 size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); 78 if (res == (size_t)(-1) && errno != E2BIG) { 79 int saved_errno = (errno == EINVAL ? EILSEQ : errno); 80 iconv_close(cd); 81 errno = saved_errno; 82 return -1; 83 } 84 count += outptr-tmpbuf; 85 } 86 { 87 char* outptr = tmpbuf; 88 size_t outsize = tmpbufsize; 89 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); 90 if (res == (size_t)(-1)) { 91 int saved_errno = errno; 92 iconv_close(cd); 93 errno = saved_errno; 94 return -1; 95 } 96 count += outptr-tmpbuf; 97 } 98 length = count; 99 } 100 if (lengthp != NULL) 101 *lengthp = length; 102 if (resultp == NULL) { 103 iconv_close(cd); 104 return 0; 105 } 106 result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length)); 107 *resultp = result; 108 if (length == 0) { 109 iconv_close(cd); 110 return 0; 111 } 112 if (result == NULL) { 113 iconv_close(cd); 114 errno = ENOMEM; 115 return -1; 116 } 117 iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */ 118 /* Do the conversion for real. */ 119 { 120 const char* inptr = start; 121 size_t insize = end-start; 122 char* outptr = result; 123 size_t outsize = length; 124 while (insize > 0) { 125 size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); 126 if (res == (size_t)(-1)) { 127 if (errno == EINVAL) 128 break; 129 else { 130 int saved_errno = errno; 131 iconv_close(cd); 132 errno = saved_errno; 133 return -1; 134 } 135 } 136 } 137 { 138 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); 139 if (res == (size_t)(-1)) { 140 int saved_errno = errno; 141 iconv_close(cd); 142 errno = saved_errno; 143 return -1; 144 } 145 } 146 if (outsize != 0) abort(); 147 } 148 iconv_close(cd); 149 return 0; 150} 151