1/* Look at first character in UTF-8 string. 2 Copyright (C) 1999-2002, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#include <config.h> 19 20/* Specification. */ 21#include "unistr.h" 22 23#if !HAVE_INLINE 24 25int 26u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) 27{ 28 uint8_t c = *s; 29 30 if (c < 0x80) 31 { 32 *puc = c; 33 return 1; 34 } 35 else if (c >= 0xc2) 36 { 37 if (c < 0xe0) 38 { 39 if (n >= 2) 40 { 41#if CONFIG_UNICODE_SAFETY 42 if ((s[1] ^ 0x80) < 0x40) 43#endif 44 { 45 *puc = ((unsigned int) (c & 0x1f) << 6) 46 | (unsigned int) (s[1] ^ 0x80); 47 return 2; 48 } 49 /* invalid multibyte character */ 50 } 51 else 52 { 53 /* incomplete multibyte character */ 54 *puc = 0xfffd; 55 return n; 56 } 57 } 58 else if (c < 0xf0) 59 { 60 if (n >= 3) 61 { 62#if CONFIG_UNICODE_SAFETY 63 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 64 && (c >= 0xe1 || s[1] >= 0xa0) 65 && (c != 0xed || s[1] < 0xa0)) 66#endif 67 { 68 *puc = ((unsigned int) (c & 0x0f) << 12) 69 | ((unsigned int) (s[1] ^ 0x80) << 6) 70 | (unsigned int) (s[2] ^ 0x80); 71 return 3; 72 } 73 /* invalid multibyte character */ 74 } 75 else 76 { 77 /* incomplete multibyte character */ 78 *puc = 0xfffd; 79 return n; 80 } 81 } 82 else if (c < 0xf8) 83 { 84 if (n >= 4) 85 { 86#if CONFIG_UNICODE_SAFETY 87 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 88 && (s[3] ^ 0x80) < 0x40 89 && (c >= 0xf1 || s[1] >= 0x90) 90#if 1 91 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) 92#endif 93 ) 94#endif 95 { 96 *puc = ((unsigned int) (c & 0x07) << 18) 97 | ((unsigned int) (s[1] ^ 0x80) << 12) 98 | ((unsigned int) (s[2] ^ 0x80) << 6) 99 | (unsigned int) (s[3] ^ 0x80); 100 return 4; 101 } 102 /* invalid multibyte character */ 103 } 104 else 105 { 106 /* incomplete multibyte character */ 107 *puc = 0xfffd; 108 return n; 109 } 110 } 111#if 0 112 else if (c < 0xfc) 113 { 114 if (n >= 5) 115 { 116#if CONFIG_UNICODE_SAFETY 117 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 118 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 119 && (c >= 0xf9 || s[1] >= 0x88)) 120#endif 121 { 122 *puc = ((unsigned int) (c & 0x03) << 24) 123 | ((unsigned int) (s[1] ^ 0x80) << 18) 124 | ((unsigned int) (s[2] ^ 0x80) << 12) 125 | ((unsigned int) (s[3] ^ 0x80) << 6) 126 | (unsigned int) (s[4] ^ 0x80); 127 return 5; 128 } 129 /* invalid multibyte character */ 130 } 131 else 132 { 133 /* incomplete multibyte character */ 134 *puc = 0xfffd; 135 return n; 136 } 137 } 138 else if (c < 0xfe) 139 { 140 if (n >= 6) 141 { 142#if CONFIG_UNICODE_SAFETY 143 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 144 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 145 && (s[5] ^ 0x80) < 0x40 146 && (c >= 0xfd || s[1] >= 0x84)) 147#endif 148 { 149 *puc = ((unsigned int) (c & 0x01) << 30) 150 | ((unsigned int) (s[1] ^ 0x80) << 24) 151 | ((unsigned int) (s[2] ^ 0x80) << 18) 152 | ((unsigned int) (s[3] ^ 0x80) << 12) 153 | ((unsigned int) (s[4] ^ 0x80) << 6) 154 | (unsigned int) (s[5] ^ 0x80); 155 return 6; 156 } 157 /* invalid multibyte character */ 158 } 159 else 160 { 161 /* incomplete multibyte character */ 162 *puc = 0xfffd; 163 return n; 164 } 165 } 166#endif 167 } 168 /* invalid multibyte character */ 169 *puc = 0xfffd; 170 return 1; 171} 172 173#endif 174