1/* $NetBSD: midna_domain.c,v 1.4 2020/05/25 23:47:14 christos Exp $ */ 2 3/*++ 4/* NAME 5/* midna_domain 3 6/* SUMMARY 7/* ASCII/UTF-8 domain name conversion 8/* SYNOPSIS 9/* #include <midna_domain.h> 10/* 11/* int midna_domain_cache_size; 12/* int midna_domain_transitional; 13/* 14/* const char *midna_domain_to_ascii( 15/* const char *name) 16/* 17/* const char *midna_domain_to_utf8( 18/* const char *name) 19/* 20/* const char *midna_domain_suffix_to_ascii( 21/* const char *name) 22/* 23/* const char *midna_domain_suffix_to_utf8( 24/* const char *name) 25/* AUXILIARY FUNCTIONS 26/* void midna_domain_pre_chroot(void) 27/* DESCRIPTION 28/* The functions in this module transform domain names from/to 29/* ASCII and UTF-8 form. The result is cached to avoid repeated 30/* conversion. 31/* 32/* This module builds on the ICU library implementation of the 33/* UTS #46 specification, using default ICU library options 34/* because those are likely best tested: with transitional 35/* processing, with case mapping, with normalization, with 36/* limited IDNA2003 compatibility, without STD3 ASCII rules. 37/* 38/* midna_domain_to_ascii() converts an UTF-8 or ASCII domain 39/* name to ASCII. The result is a null pointer in case of 40/* error. This function verifies that the result passes 41/* valid_hostname(). 42/* 43/* midna_domain_to_utf8() converts an UTF-8 or ASCII domain 44/* name to UTF-8. The result is a null pointer in case of 45/* error. This function verifies that the result, after 46/* conversion to ASCII, passes valid_hostname(). 47/* 48/* midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8() 49/* take a name that starts with '.' and otherwise perform the 50/* same operations as midna_domain_to_ascii() and 51/* midna_domain_to_utf8(). 52/* 53/* midna_domain_cache_size specifies the size of the conversion 54/* result cache. This value is used only once, upon the first 55/* lookup request. 56/* 57/* midna_domain_transitional enables transitional conversion 58/* between UTF8 and ASCII labels. 59/* 60/* midna_domain_pre_chroot() does some pre-chroot initialization. 61/* SEE ALSO 62/* http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing 63/* msg(3) diagnostics interface 64/* DIAGNOSTICS 65/* Fatal errors: memory allocation problem. 66/* Warnings: conversion error or result validation error. 67/* LICENSE 68/* .ad 69/* .fi 70/* The Secure Mailer license must be distributed with this software. 71/* AUTHOR(S) 72/* Arnt Gulbrandsen 73/* 74/* Wietse Venema 75/* IBM T.J. Watson Research 76/* P.O. Box 704 77/* Yorktown Heights, NY 10598, USA 78/* 79/* Wietse Venema 80/* Google, Inc. 81/* 111 8th Avenue 82/* New York, NY 10011, USA 83/*--*/ 84 85 /* 86 * System library. 87 */ 88#include <sys_defs.h> 89#include <string.h> 90#include <ctype.h> 91 92#ifndef NO_EAI 93#include <unicode/uidna.h> 94 95 /* 96 * Utility library. 97 */ 98#include <mymalloc.h> 99#include <msg.h> 100#include <ctable.h> 101#include <stringops.h> 102#include <valid_hostname.h> 103#include <name_mask.h> 104#include <midna_domain.h> 105 106 /* 107 * Application-specific. 108 */ 109#define DEF_MIDNA_CACHE_SIZE 256 110 111int midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE; 112int midna_domain_transitional = 0; 113static VSTRING *midna_domain_buf; /* x.suffix */ 114 115#define STR(x) vstring_str(x) 116 117/* midna_domain_strerror - pick one for error reporting */ 118 119static const char *midna_domain_strerror(UErrorCode error, int info_errors) 120{ 121 122 /* 123 * XXX The UIDNA_ERROR_EMPTY_LABEL etc. names are defined in an ENUM, so 124 * we can't use #ifdef to dynamically determine which names exist. 125 */ 126 static LONG_NAME_MASK uidna_errors[] = { 127 "UIDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, 128 "UIDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, 129 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, 130 "UIDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, 131 "UIDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, 132 "UIDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, 133 "UIDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, 134 "UIDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, 135 "UIDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, 136 "UIDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, 137 "UIDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, 138 "UIDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, 139 "UIDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, 140 /* The above errors are defined with ICU 46 and later. */ 141 0, 142 }; 143 144 if (info_errors) { 145 return (str_long_name_mask_opt((VSTRING *) 0, "idna error", 146 uidna_errors, info_errors, 147 NAME_MASK_NUMBER | NAME_MASK_COMMA)); 148 } else { 149 return u_errorName(error); 150 } 151} 152 153/* midna_domain_pre_chroot - pre-chroot initialization */ 154 155void midna_domain_pre_chroot(void) 156{ 157 UErrorCode error = U_ZERO_ERROR; 158 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 159 UIDNA *idna; 160 161 idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT 162 : UIDNA_NONTRANSITIONAL_TO_ASCII, &error); 163 if (U_FAILURE(error)) 164 msg_warn("ICU library initialization failed: %s", 165 midna_domain_strerror(error, info.errors)); 166 uidna_close(idna); 167} 168 169/* midna_domain_to_ascii_create - convert domain to ASCII */ 170 171static void *midna_domain_to_ascii_create(const char *name, void *unused_context) 172{ 173 static const char myname[] = "midna_domain_to_ascii_create"; 174 char buf[1024]; /* XXX */ 175 UErrorCode error = U_ZERO_ERROR; 176 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 177 UIDNA *idna; 178 int anl; 179 180 /* 181 * Paranoia: do not expose uidna_*() to unfiltered network data. 182 */ 183 if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { 184 msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", 185 myname, name, "malformed UTF-8"); 186 return (0); 187 } 188 189 /* 190 * Perform the requested conversion. 191 */ 192 idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT 193 : UIDNA_NONTRANSITIONAL_TO_ASCII, &error); 194 anl = uidna_nameToASCII_UTF8(idna, 195 name, strlen(name), 196 buf, sizeof(buf) - 1, 197 &info, 198 &error); 199 uidna_close(idna); 200 201 /* 202 * Paranoia: verify that the result passes valid_hostname(). A quick 203 * check shows that UTS46 ToASCII by default rejects inputs with labels 204 * that start or end in '-', with names or labels that are over-long, or 205 * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on 206 * valid_hostname() on the output side just to be sure. 207 */ 208 if (U_SUCCESS(error) && info.errors == 0 && anl > 0) { 209 buf[anl] = 0; /* XXX */ 210 if (!valid_hostname(buf, DONT_GRIPE)) { 211 msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", 212 myname, name, "malformed ASCII label(s)"); 213 return (0); 214 } 215 return (mystrndup(buf, anl)); 216 } else { 217 msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", 218 myname, name, midna_domain_strerror(error, info.errors)); 219 return (0); 220 } 221} 222 223/* midna_domain_to_utf8_create - convert domain to UTF8 */ 224 225static void *midna_domain_to_utf8_create(const char *name, void *unused_context) 226{ 227 static const char myname[] = "midna_domain_to_utf8_create"; 228 char buf[1024]; /* XXX */ 229 UErrorCode error = U_ZERO_ERROR; 230 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 231 UIDNA *idna; 232 int anl; 233 234 /* 235 * Paranoia: do not expose uidna_*() to unfiltered network data. 236 */ 237 if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { 238 msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s", 239 myname, name, "malformed UTF-8"); 240 return (0); 241 } 242 243 /* 244 * Perform the requested conversion. 245 */ 246 idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT 247 : UIDNA_NONTRANSITIONAL_TO_UNICODE, &error); 248 anl = uidna_nameToUnicodeUTF8(idna, 249 name, strlen(name), 250 buf, sizeof(buf) - 1, 251 &info, 252 &error); 253 uidna_close(idna); 254 255 /* 256 * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long 257 * name or a name that contains an over-long NR-LDH label (and perhaps 258 * other invalid forms that are not covered in UTS 46, section 4.1). We 259 * rely on midna_domain_to_ascii() to validate the output. 260 */ 261 if (U_SUCCESS(error) && info.errors == 0 && anl > 0) { 262 buf[anl] = 0; /* XXX */ 263 if (midna_domain_to_ascii(buf) == 0) 264 return (0); 265 return (mystrndup(buf, anl)); 266 } else { 267 msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s", 268 myname, name, midna_domain_strerror(error, info.errors)); 269 return (0); 270 } 271} 272 273/* midna_domain_cache_free - cache element destructor */ 274 275static void midna_domain_cache_free(void *value, void *unused_context) 276{ 277 if (value) 278 myfree(value); 279} 280 281/* midna_domain_to_ascii - convert name to ASCII */ 282 283const char *midna_domain_to_ascii(const char *name) 284{ 285 static CTABLE *midna_domain_to_ascii_cache = 0; 286 287 if (midna_domain_to_ascii_cache == 0) 288 midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size, 289 midna_domain_to_ascii_create, 290 midna_domain_cache_free, 291 (void *) 0); 292 return (ctable_locate(midna_domain_to_ascii_cache, name)); 293} 294 295/* midna_domain_to_utf8 - convert name to UTF8 */ 296 297const char *midna_domain_to_utf8(const char *name) 298{ 299 static CTABLE *midna_domain_to_utf8_cache = 0; 300 301 if (midna_domain_to_utf8_cache == 0) 302 midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size, 303 midna_domain_to_utf8_create, 304 midna_domain_cache_free, 305 (void *) 0); 306 return (ctable_locate(midna_domain_to_utf8_cache, name)); 307} 308 309/* midna_domain_suffix_to_ascii - convert .name to ASCII */ 310 311const char *midna_domain_suffix_to_ascii(const char *suffix) 312{ 313 const char *cache_res; 314 315 /* 316 * If prepending x to .name causes the result to become too long, then 317 * the suffix is bad. 318 */ 319 if (midna_domain_buf == 0) 320 midna_domain_buf = vstring_alloc(100); 321 vstring_sprintf(midna_domain_buf, "x%s", suffix); 322 if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0) 323 return (0); 324 else 325 return (cache_res + 1); 326} 327 328/* midna_domain_suffix_to_utf8 - convert .name to UTF8 */ 329 330const char *midna_domain_suffix_to_utf8(const char *name) 331{ 332 const char *cache_res; 333 334 /* 335 * If prepending x to .name causes the result to become too long, then 336 * the suffix is bad. 337 */ 338 if (midna_domain_buf == 0) 339 midna_domain_buf = vstring_alloc(100); 340 vstring_sprintf(midna_domain_buf, "x%s", name); 341 if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0) 342 return (0); 343 else 344 return (cache_res + 1); 345} 346 347#ifdef TEST 348 349 /* 350 * Test program - reads names from stdin, reports invalid names to stderr. 351 */ 352#include <unistd.h> 353#include <stdlib.h> 354#include <locale.h> 355 356#include <stringops.h> /* XXX util_utf8_enable */ 357#include <vstring.h> 358#include <vstream.h> 359#include <vstring_vstream.h> 360#include <msg_vstream.h> 361 362int main(int argc, char **argv) 363{ 364 VSTRING *buffer = vstring_alloc(1); 365 const char *bp; 366 const char *ascii; 367 const char *utf8; 368 369 if (setlocale(LC_ALL, "C") == 0) 370 msg_fatal("setlocale(LC_ALL, C) failed: %m"); 371 372 msg_vstream_init(argv[0], VSTREAM_ERR); 373 /* msg_verbose = 1; */ 374 util_utf8_enable = 1; 375 376 if (geteuid() == 0) { 377 midna_domain_pre_chroot(); 378 if (chroot(".") != 0) 379 msg_fatal("chroot(\".\"): %m"); 380 } 381 while (vstring_fgets_nonl(buffer, VSTREAM_IN)) { 382 bp = STR(buffer); 383 msg_info("> %s", bp); 384 while (ISSPACE(*bp)) 385 bp++; 386 if (*bp == '#' || *bp == 0) 387 continue; 388 msg_info("unconditional conversions:"); 389 utf8 = midna_domain_to_utf8(bp); 390 msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)"); 391 ascii = midna_domain_to_ascii(bp); 392 msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)"); 393 msg_info("conditional conversions:"); 394 if (!allascii(bp)) { 395 if (ascii != 0) { 396 utf8 = midna_domain_to_utf8(ascii); 397 msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"", 398 bp, ascii, utf8 ? utf8 : "(error)"); 399 if (utf8 != 0) { 400 if (strcmp(utf8, bp) != 0) 401 msg_warn("\"%s\" != \"%s\"", bp, utf8); 402 } 403 } 404 } else { 405 if (utf8 != 0) { 406 ascii = midna_domain_to_ascii(utf8); 407 msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"", 408 bp, utf8, ascii ? ascii : "(error)"); 409 if (ascii != 0) { 410 if (strcmp(ascii, bp) != 0) 411 msg_warn("\"%s\" != \"%s\"", bp, ascii); 412 } 413 } 414 } 415 } 416 exit(0); 417} 418 419#endif /* TEST */ 420 421#endif /* NO_EAI */ 422