1/* 2 * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* 22 * UTF-32 23 */ 24 25/* Specification: Unicode 3.1 Standard Annex #19 */ 26 27/* Here we accept FFFE0000/0000FEFF marks as endianness indicators 28 everywhere in the stream, not just at the beginning. (This is contrary 29 to what #19 D36c specifies, but it allows concatenation of byte 30 sequences to work flawlessly, while disagreeing with #19 behaviour 31 only for strings containing U+FEFF characters, which is quite rare.) 32 The default is big-endian. */ 33/* The state is 0 if big-endian, 1 if little-endian. */ 34static int 35utf32_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 36{ 37 state_t state = conv->istate; 38 int count = 0; 39 for (; n >= 4;) { 40 ucs4_t wc = (state 41 ? s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24) 42 : (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]); 43 count += 4; 44 if (wc == 0x0000feff) { 45 } else if (wc == 0xfffe0000u) { 46 state ^= 1; 47 } else { 48 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) { 49 *pwc = wc; 50 conv->istate = state; 51 return count; 52 } else { 53 conv->istate = state; 54 return RET_SHIFT_ILSEQ(count); 55 } 56 } 57 s += 4; n -= 4; 58 } 59 conv->istate = state; 60 return RET_TOOFEW(count); 61} 62 63/* We output UTF-32 in big-endian order, with byte-order mark. */ 64/* The state is 0 at the beginning, 1 after the BOM has been written. */ 65static int 66utf32_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 67{ 68 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) { 69 int count = 0; 70 if (!conv->ostate) { 71 if (n >= 4) { 72 r[0] = 0x00; 73 r[1] = 0x00; 74 r[2] = 0xFE; 75 r[3] = 0xFF; 76 r += 4; n -= 4; count += 4; 77 } else 78 return RET_TOOSMALL; 79 } 80 if (wc < 0x110000) { 81 if (n >= 4) { 82 r[0] = 0; 83 r[1] = (unsigned char) (wc >> 16); 84 r[2] = (unsigned char) (wc >> 8); 85 r[3] = (unsigned char) wc; 86 conv->ostate = 1; 87 return count+4; 88 } else 89 return RET_TOOSMALL; 90 } 91 } 92 return RET_ILUNI; 93} 94