a_utf8.c revision 296465
1/* crypto/asn1/a_utf8.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 59#include <stdio.h> 60#include "cryptlib.h" 61#include <openssl/asn1.h> 62 63/* UTF8 utilities */ 64 65/*- 66 * This parses a UTF8 string one character at a time. It is passed a pointer 67 * to the string and the length of the string. It sets 'value' to the value of 68 * the current character. It returns the number of characters read or a 69 * negative error code: 70 * -1 = string too short 71 * -2 = illegal character 72 * -3 = subsequent characters not of the form 10xxxxxx 73 * -4 = character encoded incorrectly (not minimal length). 74 */ 75 76int UTF8_getc(const unsigned char *str, int len, unsigned long *val) 77{ 78 const unsigned char *p; 79 unsigned long value; 80 int ret; 81 if (len <= 0) 82 return 0; 83 p = str; 84 85 /* Check syntax and work out the encoded value (if correct) */ 86 if ((*p & 0x80) == 0) { 87 value = *p++ & 0x7f; 88 ret = 1; 89 } else if ((*p & 0xe0) == 0xc0) { 90 if (len < 2) 91 return -1; 92 if ((p[1] & 0xc0) != 0x80) 93 return -3; 94 value = (*p++ & 0x1f) << 6; 95 value |= *p++ & 0x3f; 96 if (value < 0x80) 97 return -4; 98 ret = 2; 99 } else if ((*p & 0xf0) == 0xe0) { 100 if (len < 3) 101 return -1; 102 if (((p[1] & 0xc0) != 0x80) 103 || ((p[2] & 0xc0) != 0x80)) 104 return -3; 105 value = (*p++ & 0xf) << 12; 106 value |= (*p++ & 0x3f) << 6; 107 value |= *p++ & 0x3f; 108 if (value < 0x800) 109 return -4; 110 ret = 3; 111 } else if ((*p & 0xf8) == 0xf0) { 112 if (len < 4) 113 return -1; 114 if (((p[1] & 0xc0) != 0x80) 115 || ((p[2] & 0xc0) != 0x80) 116 || ((p[3] & 0xc0) != 0x80)) 117 return -3; 118 value = ((unsigned long)(*p++ & 0x7)) << 18; 119 value |= (*p++ & 0x3f) << 12; 120 value |= (*p++ & 0x3f) << 6; 121 value |= *p++ & 0x3f; 122 if (value < 0x10000) 123 return -4; 124 ret = 4; 125 } else if ((*p & 0xfc) == 0xf8) { 126 if (len < 5) 127 return -1; 128 if (((p[1] & 0xc0) != 0x80) 129 || ((p[2] & 0xc0) != 0x80) 130 || ((p[3] & 0xc0) != 0x80) 131 || ((p[4] & 0xc0) != 0x80)) 132 return -3; 133 value = ((unsigned long)(*p++ & 0x3)) << 24; 134 value |= ((unsigned long)(*p++ & 0x3f)) << 18; 135 value |= ((unsigned long)(*p++ & 0x3f)) << 12; 136 value |= (*p++ & 0x3f) << 6; 137 value |= *p++ & 0x3f; 138 if (value < 0x200000) 139 return -4; 140 ret = 5; 141 } else if ((*p & 0xfe) == 0xfc) { 142 if (len < 6) 143 return -1; 144 if (((p[1] & 0xc0) != 0x80) 145 || ((p[2] & 0xc0) != 0x80) 146 || ((p[3] & 0xc0) != 0x80) 147 || ((p[4] & 0xc0) != 0x80) 148 || ((p[5] & 0xc0) != 0x80)) 149 return -3; 150 value = ((unsigned long)(*p++ & 0x1)) << 30; 151 value |= ((unsigned long)(*p++ & 0x3f)) << 24; 152 value |= ((unsigned long)(*p++ & 0x3f)) << 18; 153 value |= ((unsigned long)(*p++ & 0x3f)) << 12; 154 value |= (*p++ & 0x3f) << 6; 155 value |= *p++ & 0x3f; 156 if (value < 0x4000000) 157 return -4; 158 ret = 6; 159 } else 160 return -2; 161 *val = value; 162 return ret; 163} 164 165/* 166 * This takes a character 'value' and writes the UTF8 encoded value in 'str' 167 * where 'str' is a buffer containing 'len' characters. Returns the number of 168 * characters written or -1 if 'len' is too small. 'str' can be set to NULL 169 * in which case it just returns the number of characters. It will need at 170 * most 6 characters. 171 */ 172 173int UTF8_putc(unsigned char *str, int len, unsigned long value) 174{ 175 if (!str) 176 len = 6; /* Maximum we will need */ 177 else if (len <= 0) 178 return -1; 179 if (value < 0x80) { 180 if (str) 181 *str = (unsigned char)value; 182 return 1; 183 } 184 if (value < 0x800) { 185 if (len < 2) 186 return -1; 187 if (str) { 188 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); 189 *str = (unsigned char)((value & 0x3f) | 0x80); 190 } 191 return 2; 192 } 193 if (value < 0x10000) { 194 if (len < 3) 195 return -1; 196 if (str) { 197 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); 198 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 199 *str = (unsigned char)((value & 0x3f) | 0x80); 200 } 201 return 3; 202 } 203 if (value < 0x200000) { 204 if (len < 4) 205 return -1; 206 if (str) { 207 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); 208 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 209 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 210 *str = (unsigned char)((value & 0x3f) | 0x80); 211 } 212 return 4; 213 } 214 if (value < 0x4000000) { 215 if (len < 5) 216 return -1; 217 if (str) { 218 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8); 219 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); 220 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 221 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 222 *str = (unsigned char)((value & 0x3f) | 0x80); 223 } 224 return 5; 225 } 226 if (len < 6) 227 return -1; 228 if (str) { 229 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc); 230 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80); 231 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); 232 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 233 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 234 *str = (unsigned char)((value & 0x3f) | 0x80); 235 } 236 return 6; 237} 238