1/* Look at first character in UTF-8 string. 2 Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify it 6 under the terms of the GNU Lesser General Public License as published 7 by the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#include <config.h> 19 20#if defined IN_LIBUNISTRING 21/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'. */ 22# include "unistring-notinline.h" 23#endif 24 25/* Specification. */ 26#include "unistr.h" 27 28#if !HAVE_INLINE 29 30int 31u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) 32{ 33 uint8_t c = *s; 34 35 if (c < 0x80) 36 { 37 *puc = c; 38 return 1; 39 } 40 else if (c >= 0xc2) 41 { 42 if (c < 0xe0) 43 { 44 if (n >= 2) 45 { 46 if ((s[1] ^ 0x80) < 0x40) 47 { 48 *puc = ((unsigned int) (c & 0x1f) << 6) 49 | (unsigned int) (s[1] ^ 0x80); 50 return 2; 51 } 52 /* invalid multibyte character */ 53 } 54 else 55 { 56 /* incomplete multibyte character */ 57 *puc = 0xfffd; 58 return n; 59 } 60 } 61 else if (c < 0xf0) 62 { 63 if (n >= 3) 64 { 65 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 66 && (c >= 0xe1 || s[1] >= 0xa0) 67 && (c != 0xed || s[1] < 0xa0)) 68 { 69 *puc = ((unsigned int) (c & 0x0f) << 12) 70 | ((unsigned int) (s[1] ^ 0x80) << 6) 71 | (unsigned int) (s[2] ^ 0x80); 72 return 3; 73 } 74 /* invalid multibyte character */ 75 } 76 else 77 { 78 /* incomplete multibyte character */ 79 *puc = 0xfffd; 80 return n; 81 } 82 } 83 else if (c < 0xf8) 84 { 85 if (n >= 4) 86 { 87 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 88 && (s[3] ^ 0x80) < 0x40 89 && (c >= 0xf1 || s[1] >= 0x90) 90#if 1 91 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) 92#endif 93 ) 94 { 95 *puc = ((unsigned int) (c & 0x07) << 18) 96 | ((unsigned int) (s[1] ^ 0x80) << 12) 97 | ((unsigned int) (s[2] ^ 0x80) << 6) 98 | (unsigned int) (s[3] ^ 0x80); 99 return 4; 100 } 101 /* invalid multibyte character */ 102 } 103 else 104 { 105 /* incomplete multibyte character */ 106 *puc = 0xfffd; 107 return n; 108 } 109 } 110#if 0 111 else if (c < 0xfc) 112 { 113 if (n >= 5) 114 { 115 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 116 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 117 && (c >= 0xf9 || s[1] >= 0x88)) 118 { 119 *puc = ((unsigned int) (c & 0x03) << 24) 120 | ((unsigned int) (s[1] ^ 0x80) << 18) 121 | ((unsigned int) (s[2] ^ 0x80) << 12) 122 | ((unsigned int) (s[3] ^ 0x80) << 6) 123 | (unsigned int) (s[4] ^ 0x80); 124 return 5; 125 } 126 /* invalid multibyte character */ 127 } 128 else 129 { 130 /* incomplete multibyte character */ 131 *puc = 0xfffd; 132 return n; 133 } 134 } 135 else if (c < 0xfe) 136 { 137 if (n >= 6) 138 { 139 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 140 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 141 && (s[5] ^ 0x80) < 0x40 142 && (c >= 0xfd || s[1] >= 0x84)) 143 { 144 *puc = ((unsigned int) (c & 0x01) << 30) 145 | ((unsigned int) (s[1] ^ 0x80) << 24) 146 | ((unsigned int) (s[2] ^ 0x80) << 18) 147 | ((unsigned int) (s[3] ^ 0x80) << 12) 148 | ((unsigned int) (s[4] ^ 0x80) << 6) 149 | (unsigned int) (s[5] ^ 0x80); 150 return 6; 151 } 152 /* invalid multibyte character */ 153 } 154 else 155 { 156 /* incomplete multibyte character */ 157 *puc = 0xfffd; 158 return n; 159 } 160 } 161#endif 162 } 163 /* invalid multibyte character */ 164 *puc = 0xfffd; 165 return 1; 166} 167 168#endif 169