1/* $NetBSD$ */ 2 3/* 4 * Copyright (c) 2004 Kungliga Tekniska H��gskolan 5 * (Royal Institute of Technology, Stockholm, Sweden). 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * 3. Neither the name of the Institute nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#ifdef HAVE_CONFIG_H 37#include <config.h> 38#endif 39#include "windlocl.h" 40 41#include <assert.h> 42#include <stdlib.h> 43#include <errno.h> 44#include <stdio.h> 45 46#include <krb5/roken.h> 47 48#include "normalize_table.h" 49 50static int 51translation_cmp(const void *key, const void *data) 52{ 53 const struct translation *t1 = (const struct translation *)key; 54 const struct translation *t2 = (const struct translation *)data; 55 56 return t1->key - t2->key; 57} 58 59enum { s_base = 0xAC00}; 60enum { s_count = 11172}; 61enum { l_base = 0x1100}; 62enum { l_count = 19}; 63enum { v_base = 0x1161}; 64enum { v_count = 21}; 65enum { t_base = 0x11A7}; 66enum { t_count = 28}; 67enum { n_count = v_count * t_count}; 68 69static int 70hangul_decomp(const uint32_t *in, size_t in_len, 71 uint32_t *out, size_t *out_len) 72{ 73 uint32_t u = *in; 74 unsigned s_index; 75 unsigned l, v, t; 76 unsigned o; 77 78 if (u < s_base || u >= s_base + s_count) 79 return 0; 80 s_index = u - s_base; 81 l = l_base + s_index / n_count; 82 v = v_base + (s_index % n_count) / t_count; 83 t = t_base + s_index % t_count; 84 o = 2; 85 if (t != t_base) 86 ++o; 87 if (*out_len < o) 88 return WIND_ERR_OVERRUN; 89 out[0] = l; 90 out[1] = v; 91 if (t != t_base) 92 out[2] = t; 93 *out_len = o; 94 return 1; 95} 96 97static uint32_t 98hangul_composition(const uint32_t *in, size_t in_len) 99{ 100 if (in_len < 2) 101 return 0; 102 if (in[0] >= l_base && in[0] < l_base + l_count) { 103 unsigned l_index = in[0] - l_base; 104 unsigned v_index; 105 106 if (in[1] < v_base || in[1] >= v_base + v_count) 107 return 0; 108 v_index = in[1] - v_base; 109 return (l_index * v_count + v_index) * t_count + s_base; 110 } else if (in[0] >= s_base && in[0] < s_base + s_count) { 111 unsigned s_index = in[0] - s_base; 112 unsigned t_index; 113 114 if (s_index % t_count != 0) 115 return 0; 116 if (in[1] < t_base || in[1] >= t_base + t_count) 117 return 0; 118 t_index = in[1] - t_base; 119 return in[0] + t_index; 120 } 121 return 0; 122} 123 124static int 125compat_decomp(const uint32_t *in, size_t in_len, 126 uint32_t *out, size_t *out_len) 127{ 128 unsigned i; 129 unsigned o = 0; 130 131 for (i = 0; i < in_len; ++i) { 132 struct translation ts = {in[i]}; 133 size_t sub_len = *out_len - o; 134 int ret; 135 136 ret = hangul_decomp(in + i, in_len - i, 137 out + o, &sub_len); 138 if (ret) { 139 if (ret == WIND_ERR_OVERRUN) 140 return ret; 141 o += sub_len; 142 } else { 143 void *s = bsearch(&ts, 144 _wind_normalize_table, 145 _wind_normalize_table_size, 146 sizeof(_wind_normalize_table[0]), 147 translation_cmp); 148 if (s != NULL) { 149 const struct translation *t = (const struct translation *)s; 150 151 ret = compat_decomp(_wind_normalize_val_table + t->val_offset, 152 t->val_len, 153 out + o, &sub_len); 154 if (ret) 155 return ret; 156 o += sub_len; 157 } else { 158 if (o >= *out_len) 159 return WIND_ERR_OVERRUN; 160 out[o++] = in[i]; 161 162 } 163 } 164 } 165 *out_len = o; 166 return 0; 167} 168 169static void 170swap_char(uint32_t * a, uint32_t * b) 171{ 172 uint32_t t; 173 t = *a; 174 *a = *b; 175 *b = t; 176} 177 178/* Unicode 5.2.0 D109 Canonical Ordering for a sequence of code points 179 * that all have Canonical_Combining_Class > 0 */ 180static void 181canonical_reorder_sequence(uint32_t * a, size_t len) 182{ 183 size_t i, j; 184 185 if (len <= 1) 186 return; 187 188 for (i = 1; i < len; i++) { 189 for (j = i; 190 j > 0 && 191 _wind_combining_class(a[j]) < _wind_combining_class(a[j-1]); 192 j--) 193 swap_char(&a[j], &a[j-1]); 194 } 195} 196 197static void 198canonical_reorder(uint32_t *tmp, size_t tmp_len) 199{ 200 size_t i; 201 202 for (i = 0; i < tmp_len; ++i) { 203 int cc = _wind_combining_class(tmp[i]); 204 if (cc) { 205 size_t j; 206 for (j = i + 1; 207 j < tmp_len && _wind_combining_class(tmp[j]); 208 ++j) 209 ; 210 canonical_reorder_sequence(&tmp[i], j - i); 211 i = j; 212 } 213 } 214} 215 216static uint32_t 217find_composition(const uint32_t *in, unsigned in_len) 218{ 219 unsigned short canon_index = 0; 220 uint32_t cur; 221 unsigned n = 0; 222 223 cur = hangul_composition(in, in_len); 224 if (cur) 225 return cur; 226 227 do { 228 const struct canon_node *c = &_wind_canon_table[canon_index]; 229 unsigned i; 230 231 if (n % 5 == 0) { 232 cur = *in++; 233 if (in_len-- == 0) 234 return c->val; 235 } 236 237 i = cur >> 16; 238 if (i < c->next_start || i >= c->next_end) 239 canon_index = 0; 240 else 241 canon_index = 242 _wind_canon_next_table[c->next_offset + i - c->next_start]; 243 if (canon_index != 0) { 244 cur = (cur << 4) & 0xFFFFF; 245 ++n; 246 } 247 } while (canon_index != 0); 248 return 0; 249} 250 251static int 252combine(const uint32_t *in, size_t in_len, 253 uint32_t *out, size_t *out_len) 254{ 255 unsigned i; 256 int ostarter; 257 unsigned o = 0; 258 int old_cc; 259 260 for (i = 0; i < in_len;) { 261 while (i < in_len && _wind_combining_class(in[i]) != 0) { 262 out[o++] = in[i++]; 263 } 264 if (i < in_len) { 265 if (o >= *out_len) 266 return WIND_ERR_OVERRUN; 267 ostarter = o; 268 out[o++] = in[i++]; 269 old_cc = -1; 270 271 while (i < in_len) { 272 uint32_t comb; 273 uint32_t v[2]; 274 int cc; 275 276 v[0] = out[ostarter]; 277 v[1] = in[i]; 278 279 cc = _wind_combining_class(in[i]); 280 if (old_cc != cc && (comb = find_composition(v, 2))) { 281 out[ostarter] = comb; 282 } else if (cc == 0) { 283 break; 284 } else { 285 if (o >= *out_len) 286 return WIND_ERR_OVERRUN; 287 out[o++] = in[i]; 288 old_cc = cc; 289 } 290 ++i; 291 } 292 } 293 } 294 *out_len = o; 295 return 0; 296} 297 298int 299_wind_stringprep_normalize(const uint32_t *in, size_t in_len, 300 uint32_t *out, size_t *out_len) 301{ 302 size_t tmp_len; 303 uint32_t *tmp; 304 int ret; 305 306 if (in_len == 0) { 307 *out_len = 0; 308 return 0; 309 } 310 311 tmp_len = in_len * 4; 312 if (tmp_len < MAX_LENGTH_CANON) 313 tmp_len = MAX_LENGTH_CANON; 314 tmp = malloc(tmp_len * sizeof(uint32_t)); 315 if (tmp == NULL) 316 return ENOMEM; 317 318 ret = compat_decomp(in, in_len, tmp, &tmp_len); 319 if (ret) { 320 free(tmp); 321 return ret; 322 } 323 canonical_reorder(tmp, tmp_len); 324 ret = combine(tmp, tmp_len, out, out_len); 325 free(tmp); 326 return ret; 327} 328