1/* Look at first character in UTF-8 string. 2 Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software 3 Foundation, Inc. 4 Written by Bruno Haible <bruno@clisp.org>, 2002. 5 6 This program is free software: you can redistribute it and/or modify it 7 under the terms of the GNU Lesser General Public License as published 8 by the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19#include <config.h> 20 21/* Specification. */ 22#include "unistr.h" 23 24int 25u8_strmbtouc (ucs4_t *puc, const uint8_t *s) 26{ 27 /* Keep in sync with unistr.h and utf8-ucs4.c. */ 28 uint8_t c = *s; 29 30 if (c < 0x80) 31 { 32 *puc = c; 33 return (c != 0 ? 1 : 0); 34 } 35 if (c >= 0xc2) 36 { 37 if (c < 0xe0) 38 { 39#if CONFIG_UNICODE_SAFETY 40 if ((s[1] ^ 0x80) < 0x40) 41#else 42 if (s[1] != 0) 43#endif 44 { 45 *puc = ((unsigned int) (c & 0x1f) << 6) 46 | (unsigned int) (s[1] ^ 0x80); 47 return 2; 48 } 49 } 50 else if (c < 0xf0) 51 { 52#if CONFIG_UNICODE_SAFETY 53 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 54 && (c >= 0xe1 || s[1] >= 0xa0) 55 && (c != 0xed || s[1] < 0xa0)) 56#else 57 if (s[1] != 0 && s[2] != 0) 58#endif 59 { 60 *puc = ((unsigned int) (c & 0x0f) << 12) 61 | ((unsigned int) (s[1] ^ 0x80) << 6) 62 | (unsigned int) (s[2] ^ 0x80); 63 return 3; 64 } 65 } 66 else if (c < 0xf8) 67 { 68#if CONFIG_UNICODE_SAFETY 69 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 70 && (s[3] ^ 0x80) < 0x40 71 && (c >= 0xf1 || s[1] >= 0x90) 72#if 1 73 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) 74#endif 75 ) 76#else 77 if (s[1] != 0 && s[2] != 0 && s[3] != 0) 78#endif 79 { 80 *puc = ((unsigned int) (c & 0x07) << 18) 81 | ((unsigned int) (s[1] ^ 0x80) << 12) 82 | ((unsigned int) (s[2] ^ 0x80) << 6) 83 | (unsigned int) (s[3] ^ 0x80); 84 return 4; 85 } 86 } 87#if 0 88 else if (c < 0xfc) 89 { 90#if CONFIG_UNICODE_SAFETY 91 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 92 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 93 && (c >= 0xf9 || s[1] >= 0x88)) 94#else 95 if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0) 96#endif 97 { 98 *puc = ((unsigned int) (c & 0x03) << 24) 99 | ((unsigned int) (s[1] ^ 0x80) << 18) 100 | ((unsigned int) (s[2] ^ 0x80) << 12) 101 | ((unsigned int) (s[3] ^ 0x80) << 6) 102 | (unsigned int) (s[4] ^ 0x80); 103 return 5; 104 } 105 } 106 else if (c < 0xfe) 107 { 108#if CONFIG_UNICODE_SAFETY 109 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 110 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 111 && (s[5] ^ 0x80) < 0x40 112 && (c >= 0xfd || s[1] >= 0x84)) 113#else 114 if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0 && s[5] != 0) 115#endif 116 { 117 *puc = ((unsigned int) (c & 0x01) << 30) 118 | ((unsigned int) (s[1] ^ 0x80) << 24) 119 | ((unsigned int) (s[2] ^ 0x80) << 18) 120 | ((unsigned int) (s[3] ^ 0x80) << 12) 121 | ((unsigned int) (s[4] ^ 0x80) << 6) 122 | (unsigned int) (s[5] ^ 0x80); 123 return 6; 124 } 125 } 126#endif 127 } 128 /* invalid or incomplete multibyte character */ 129 return -1; 130} 131