1#ifndef lint 2static char *rcsid = "$Id: race.c,v 1.1 2003/06/04 00:26:07 marka Exp $"; 3#endif 4 5/* 6 * Copyright (c) 2000,2001,2002 Japan Network Information Center. 7 * All rights reserved. 8 * 9 * By using this file, you agree to the terms and conditions set forth bellow. 10 * 11 * LICENSE TERMS AND CONDITIONS 12 * 13 * The following License Terms and Conditions apply, unless a different 14 * license is obtained from Japan Network Information Center ("JPNIC"), 15 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 16 * Chiyoda-ku, Tokyo 101-0047, Japan. 17 * 18 * 1. Use, Modification and Redistribution (including distribution of any 19 * modified or derived work) in source and/or binary forms is permitted 20 * under this License Terms and Conditions. 21 * 22 * 2. Redistribution of source code must retain the copyright notices as they 23 * appear in each source code file, this License Terms and Conditions. 24 * 25 * 3. Redistribution in binary form must reproduce the Copyright Notice, 26 * this License Terms and Conditions, in the documentation and/or other 27 * materials provided with the distribution. For the purposes of binary 28 * distribution the "Copyright Notice" refers to the following language: 29 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 30 * 31 * 4. The name of JPNIC may not be used to endorse or promote products 32 * derived from this Software without specific prior written approval of 33 * JPNIC. 34 * 35 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 38 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 42 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 43 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 44 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 45 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 46 */ 47 48#include <config.h> 49 50#include <stddef.h> 51#include <stdlib.h> 52#include <string.h> 53 54#include <idn/result.h> 55#include <idn/assert.h> 56#include <idn/logmacro.h> 57#include <idn/converter.h> 58#include <idn/ucs4.h> 59#include <idn/debug.h> 60#include <idn/race.h> 61#include <idn/util.h> 62 63#ifndef IDN_RACE_PREFIX 64#define IDN_RACE_PREFIX "bq--" 65#endif 66#define RACE_2OCTET_MODE 0xd8 67#define RACE_ESCAPE 0xff 68#define RACE_ESCAPE_2ND 0x99 69 70#define RACE_BUF_SIZE 128 /* more than enough */ 71 72/* 73 * Unicode surrogate pair. 74 */ 75#define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff) 76#define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff) 77#define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10)) 78#define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff)) 79#define SURROGATE_BASE 0x10000 80#define SURROGATE_H_OFF 0xd800 81#define SURROGATE_L_OFF 0xdc00 82#define COMBINE_SURROGATE(h, l) \ 83 (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF)) 84 85/* 86 * Compression type. 87 */ 88enum { 89 compress_one, /* all characters are in a single row */ 90 compress_two, /* row 0 and another row */ 91 compress_none /* nope */ 92}; 93 94static idn_result_t race_decode_decompress(const char *from, 95 unsigned short *buf, 96 size_t buflen); 97static idn_result_t race_compress_encode(const unsigned short *p, 98 int compress_mode, 99 char *to, size_t tolen); 100static int get_compress_mode(unsigned short *p); 101 102idn_result_t 103idn__race_decode(idn_converter_t ctx, void *privdata, 104 const char *from, unsigned long *to, size_t tolen) { 105 unsigned short *buf = NULL; 106 size_t prefixlen = strlen(IDN_RACE_PREFIX); 107 size_t fromlen; 108 size_t buflen; 109 idn_result_t r; 110 111 assert(ctx != NULL); 112 113 TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n", 114 idn__debug_xstring(from, 50), (int)tolen)); 115 116 if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) { 117 if (*from == '\0') { 118 r = idn_ucs4_utf8toucs4(from, to, tolen); 119 goto ret; 120 } 121 r = idn_invalid_encoding; 122 goto ret; 123 } 124 from += prefixlen; 125 fromlen = strlen(from); 126 127 /* 128 * Allocate sufficient buffer. 129 */ 130 buflen = fromlen + 1; 131 buf = malloc(sizeof(*buf) * buflen); 132 if (buf == NULL) { 133 r = idn_nomemory; 134 goto ret; 135 } 136 137 /* 138 * Decode base32 and decompress. 139 */ 140 r = race_decode_decompress(from, buf, buflen); 141 if (r != idn_success) 142 goto ret; 143 144 /* 145 * Now 'buf' points the decompressed string, which must contain 146 * UTF-16 characters. 147 */ 148 149 /* 150 * Convert to UCS4. 151 */ 152 r = idn_ucs4_utf16toucs4(buf, to, tolen); 153 if (r != idn_success) 154 goto ret; 155 156ret: 157 free(buf); 158 if (r == idn_success) { 159 TRACE(("idn__race_decode(): succcess (to=\"%s\")\n", 160 idn__debug_ucs4xstring(to, 50))); 161 } else { 162 TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r))); 163 } 164 return (r); 165} 166 167static idn_result_t 168race_decode_decompress(const char *from, unsigned short *buf, size_t buflen) 169{ 170 unsigned short *p = buf; 171 unsigned int bitbuf = 0; 172 int bitlen = 0; 173 int i, j; 174 size_t len; 175 176 while (*from != '\0') { 177 int c = *from++; 178 int x; 179 180 if ('a' <= c && c <= 'z') 181 x = c - 'a'; 182 else if ('A' <= c && c <= 'Z') 183 x = c - 'A'; 184 else if ('2' <= c && c <= '7') 185 x = c - '2' + 26; 186 else 187 return (idn_invalid_encoding); 188 189 bitbuf = (bitbuf << 5) + x; 190 bitlen += 5; 191 if (bitlen >= 8) { 192 *p++ = (bitbuf >> (bitlen - 8)) & 0xff; 193 bitlen -= 8; 194 } 195 } 196 len = p - buf; 197 198 /* 199 * Now 'buf' holds the decoded string. 200 */ 201 202 /* 203 * Decompress. 204 */ 205 if (buf[0] == RACE_2OCTET_MODE) { 206 if ((len - 1) % 2 != 0) 207 return (idn_invalid_encoding); 208 for (i = 1, j = 0; i < len; i += 2, j++) 209 buf[j] = (buf[i] << 8) + buf[i + 1]; 210 len = j; 211 } else { 212 unsigned short c = buf[0] << 8; /* higher octet */ 213 214 for (i = 1, j = 0; i < len; j++) { 215 if (buf[i] == RACE_ESCAPE) { 216 if (i + 1 >= len) 217 return (idn_invalid_encoding); 218 else if (buf[i + 1] == RACE_ESCAPE_2ND) 219 buf[j] = c | 0xff; 220 else 221 buf[j] = buf[i + 1]; 222 i += 2; 223 224 } else if (buf[i] == 0x99 && c == 0x00) { 225 /* 226 * The RACE specification says this is error. 227 */ 228 return (idn_invalid_encoding); 229 230 } else { 231 buf[j] = c | buf[i++]; 232 } 233 } 234 len = j; 235 } 236 buf[len] = '\0'; 237 238 return (idn_success); 239} 240 241idn_result_t 242idn__race_encode(idn_converter_t ctx, void *privdata, 243 const unsigned long *from, char *to, size_t tolen) { 244 char *to_org = to; 245 unsigned short *p, *buf = NULL; 246 size_t prefixlen = strlen(IDN_RACE_PREFIX); 247 size_t buflen; 248 size_t fromlen; 249 idn_result_t r; 250 int compress_mode; 251 252 assert(ctx != NULL); 253 254 TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n", 255 idn__debug_ucs4xstring(from, 50), (int)tolen)); 256 257 if (*from == '\0') { 258 r = idn_ucs4_ucs4toutf8(from, to, tolen); 259 goto ret; 260 } else if (idn__util_ucs4haveaceprefix(from, IDN_RACE_PREFIX)) { 261 r = idn_prohibited; 262 goto ret; 263 } 264 265 if (tolen < prefixlen) { 266 r = idn_buffer_overflow; 267 goto ret; 268 } 269 memcpy(to, IDN_RACE_PREFIX, prefixlen); 270 to += prefixlen; 271 tolen -= prefixlen; 272 273 fromlen = idn_ucs4_strlen(from); 274 buflen = fromlen * 2 + 2; 275 276 /* 277 * Convert to UTF-16. 278 * Preserve space for a character at the top of the buffer. 279 */ 280 for (;;) { 281 unsigned short *new_buf; 282 283 new_buf = realloc(buf, sizeof(*buf) * buflen); 284 if (new_buf == NULL) { 285 r = idn_nomemory; 286 goto ret; 287 } 288 buf = new_buf; 289 290 r = idn_ucs4_ucs4toutf16(from, buf + 1, buflen - 1); 291 if (r == idn_success) 292 break; 293 else if (r != idn_buffer_overflow) 294 goto ret; 295 296 buflen = fromlen * 2 + 2; 297 } 298 p = buf + 1; 299 300 /* 301 * Now 'p' contains UTF-16 encoded string. 302 */ 303 304 /* 305 * Check U+0099. 306 * RACE doesn't permit U+0099 in an input string. 307 */ 308 for (p = buf + 1; *p != '\0'; p++) { 309 if (*p == 0x0099) { 310 r = idn_invalid_encoding; 311 goto ret; 312 } 313 } 314 315 /* 316 * Compress, encode in base-32 and output. 317 */ 318 compress_mode = get_compress_mode(buf + 1); 319 r = race_compress_encode(buf, compress_mode, to, tolen); 320 321ret: 322 free(buf); 323 if (r == idn_success) { 324 TRACE(("idn__race_encode(): succcess (to=\"%s\")\n", 325 idn__debug_xstring(to_org, 50))); 326 } else { 327 TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r))); 328 } 329 return (r); 330} 331 332static idn_result_t 333race_compress_encode(const unsigned short *p, int compress_mode, 334 char *to, size_t tolen) 335{ 336 unsigned long bitbuf = *p++; /* bit stream buffer */ 337 int bitlen = 8; /* # of bits in 'bitbuf' */ 338 339 while (*p != '\0' || bitlen > 0) { 340 unsigned int c = *p; 341 342 if (c == '\0') { 343 /* End of data. Flush. */ 344 bitbuf <<= (5 - bitlen); 345 bitlen = 5; 346 } else if (compress_mode == compress_none) { 347 /* Push 16 bit data. */ 348 bitbuf = (bitbuf << 16) | c; 349 bitlen += 16; 350 p++; 351 } else {/* compress_mode == compress_one/compress_two */ 352 /* Push 8 or 16 bit data. */ 353 if (compress_mode == compress_two && 354 (c & 0xff00) == 0) { 355 /* Upper octet is zero (and not U1). */ 356 bitbuf = (bitbuf << 16) | 0xff00 | c; 357 bitlen += 16; 358 } else if ((c & 0xff) == 0xff) { 359 /* Lower octet is 0xff. */ 360 bitbuf = (bitbuf << 16) | 361 (RACE_ESCAPE << 8) | RACE_ESCAPE_2ND; 362 bitlen += 16; 363 } else { 364 /* Just output lower octet. */ 365 bitbuf = (bitbuf << 8) | (c & 0xff); 366 bitlen += 8; 367 } 368 p++; 369 } 370 371 /* 372 * Output bits in 'bitbuf' in 5-bit unit. 373 */ 374 while (bitlen >= 5) { 375 int x; 376 377 /* Get top 5 bits. */ 378 x = (bitbuf >> (bitlen - 5)) & 0x1f; 379 bitlen -= 5; 380 381 /* Encode. */ 382 if (x < 26) 383 x += 'a'; 384 else 385 x = (x - 26) + '2'; 386 387 if (tolen < 1) 388 return (idn_buffer_overflow); 389 390 *to++ = x; 391 tolen--; 392 } 393 } 394 395 if (tolen <= 0) 396 return (idn_buffer_overflow); 397 398 *to = '\0'; 399 return (idn_success); 400} 401 402static int 403get_compress_mode(unsigned short *p) { 404 int zero = 0; 405 unsigned int upper = 0; 406 unsigned short *modepos = p - 1; 407 408 while (*p != '\0') { 409 unsigned int hi = *p++ & 0xff00; 410 411 if (hi == 0) { 412 zero++; 413 } else if (hi == upper) { 414 ; 415 } else if (upper == 0) { 416 upper = hi; 417 } else { 418 *modepos = RACE_2OCTET_MODE; 419 return (compress_none); 420 } 421 } 422 *modepos = upper >> 8; 423 if (upper > 0 && zero > 0) 424 return (compress_two); 425 else 426 return (compress_one); 427} 428