1/*	$NetBSD: midna_domain.c,v 1.4 2020/05/25 23:47:14 christos Exp $	*/
2
3/*++
4/* NAME
5/*	midna_domain 3
6/* SUMMARY
7/*	ASCII/UTF-8 domain name conversion
8/* SYNOPSIS
9/*	#include <midna_domain.h>
10/*
11/*	int midna_domain_cache_size;
12/*	int midna_domain_transitional;
13/*
14/*	const char *midna_domain_to_ascii(
15/*	const char *name)
16/*
17/*	const char *midna_domain_to_utf8(
18/*	const char *name)
19/*
20/*	const char *midna_domain_suffix_to_ascii(
21/*	const char *name)
22/*
23/*	const char *midna_domain_suffix_to_utf8(
24/*	const char *name)
25/* AUXILIARY FUNCTIONS
26/*	void midna_domain_pre_chroot(void)
27/* DESCRIPTION
28/*	The functions in this module transform domain names from/to
29/*	ASCII and UTF-8 form. The result is cached to avoid repeated
30/*	conversion.
31/*
32/*	This module builds on the ICU library implementation of the
33/*	UTS #46 specification, using default ICU library options
34/*	because those are likely best tested: with transitional
35/*	processing, with case mapping, with normalization, with
36/*	limited IDNA2003 compatibility, without STD3 ASCII rules.
37/*
38/*	midna_domain_to_ascii() converts an UTF-8 or ASCII domain
39/*	name to ASCII.  The result is a null pointer in case of
40/*	error.  This function verifies that the result passes
41/*	valid_hostname().
42/*
43/*	midna_domain_to_utf8() converts an UTF-8 or ASCII domain
44/*	name to UTF-8.  The result is a null pointer in case of
45/*	error.  This function verifies that the result, after
46/*	conversion to ASCII, passes valid_hostname().
47/*
48/*	midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8()
49/*	take a name that starts with '.' and otherwise perform the
50/*	same operations as midna_domain_to_ascii() and
51/*	midna_domain_to_utf8().
52/*
53/*	midna_domain_cache_size specifies the size of the conversion
54/*	result cache.  This value is used only once, upon the first
55/*	lookup request.
56/*
57/*	midna_domain_transitional enables transitional conversion
58/*	between UTF8 and ASCII labels.
59/*
60/*	midna_domain_pre_chroot() does some pre-chroot initialization.
61/* SEE ALSO
62/*	http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing
63/*	msg(3) diagnostics interface
64/* DIAGNOSTICS
65/*	Fatal errors: memory allocation problem.
66/*	Warnings: conversion error or result validation error.
67/* LICENSE
68/* .ad
69/* .fi
70/*	The Secure Mailer license must be distributed with this software.
71/* AUTHOR(S)
72/*	Arnt Gulbrandsen
73/*
74/*	Wietse Venema
75/*	IBM T.J. Watson Research
76/*	P.O. Box 704
77/*	Yorktown Heights, NY 10598, USA
78/*
79/*	Wietse Venema
80/*	Google, Inc.
81/*	111 8th Avenue
82/*	New York, NY 10011, USA
83/*--*/
84
85 /*
86  * System library.
87  */
88#include <sys_defs.h>
89#include <string.h>
90#include <ctype.h>
91
92#ifndef NO_EAI
93#include <unicode/uidna.h>
94
95 /*
96  * Utility library.
97  */
98#include <mymalloc.h>
99#include <msg.h>
100#include <ctable.h>
101#include <stringops.h>
102#include <valid_hostname.h>
103#include <name_mask.h>
104#include <midna_domain.h>
105
106 /*
107  * Application-specific.
108  */
109#define DEF_MIDNA_CACHE_SIZE	256
110
111int     midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE;
112int     midna_domain_transitional = 0;
113static VSTRING *midna_domain_buf;	/* x.suffix */
114
115#define STR(x)	vstring_str(x)
116
117/* midna_domain_strerror - pick one for error reporting */
118
119static const char *midna_domain_strerror(UErrorCode error, int info_errors)
120{
121
122    /*
123     * XXX The UIDNA_ERROR_EMPTY_LABEL etc. names are defined in an ENUM, so
124     * we can't use #ifdef to dynamically determine which names exist.
125     */
126    static LONG_NAME_MASK uidna_errors[] = {
127	"UIDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL,
128	"UIDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG,
129	"UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG,
130	"UIDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN,
131	"UIDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN,
132	"UIDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4,
133	"UIDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK,
134	"UIDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED,
135	"UIDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE,
136	"UIDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT,
137	"UIDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL,
138	"UIDNA_ERROR_BIDI", UIDNA_ERROR_BIDI,
139	"UIDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ,
140	/* The above errors are defined with ICU 46 and later. */
141	0,
142    };
143
144    if (info_errors) {
145	return (str_long_name_mask_opt((VSTRING *) 0, "idna error",
146				       uidna_errors, info_errors,
147				       NAME_MASK_NUMBER | NAME_MASK_COMMA));
148    } else {
149	return u_errorName(error);
150    }
151}
152
153/* midna_domain_pre_chroot - pre-chroot initialization */
154
155void    midna_domain_pre_chroot(void)
156{
157    UErrorCode error = U_ZERO_ERROR;
158    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
159    UIDNA  *idna;
160
161    idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
162			   : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
163    if (U_FAILURE(error))
164	msg_warn("ICU library initialization failed: %s",
165		 midna_domain_strerror(error, info.errors));
166    uidna_close(idna);
167}
168
169/* midna_domain_to_ascii_create - convert domain to ASCII */
170
171static void *midna_domain_to_ascii_create(const char *name, void *unused_context)
172{
173    static const char myname[] = "midna_domain_to_ascii_create";
174    char    buf[1024];			/* XXX */
175    UErrorCode error = U_ZERO_ERROR;
176    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
177    UIDNA  *idna;
178    int     anl;
179
180    /*
181     * Paranoia: do not expose uidna_*() to unfiltered network data.
182     */
183    if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
184	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
185		 myname, name, "malformed UTF-8");
186	return (0);
187    }
188
189    /*
190     * Perform the requested conversion.
191     */
192    idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
193			   : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
194    anl = uidna_nameToASCII_UTF8(idna,
195				 name, strlen(name),
196				 buf, sizeof(buf) - 1,
197				 &info,
198				 &error);
199    uidna_close(idna);
200
201    /*
202     * Paranoia: verify that the result passes valid_hostname(). A quick
203     * check shows that UTS46 ToASCII by default rejects inputs with labels
204     * that start or end in '-', with names or labels that are over-long, or
205     * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on
206     * valid_hostname() on the output side just to be sure.
207     */
208    if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
209	buf[anl] = 0;				/* XXX */
210	if (!valid_hostname(buf, DONT_GRIPE)) {
211	    msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
212		     myname, name, "malformed ASCII label(s)");
213	    return (0);
214	}
215	return (mystrndup(buf, anl));
216    } else {
217	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
218		 myname, name, midna_domain_strerror(error, info.errors));
219	return (0);
220    }
221}
222
223/* midna_domain_to_utf8_create - convert domain to UTF8 */
224
225static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
226{
227    static const char myname[] = "midna_domain_to_utf8_create";
228    char    buf[1024];			/* XXX */
229    UErrorCode error = U_ZERO_ERROR;
230    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
231    UIDNA  *idna;
232    int     anl;
233
234    /*
235     * Paranoia: do not expose uidna_*() to unfiltered network data.
236     */
237    if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
238	msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
239		 myname, name, "malformed UTF-8");
240	return (0);
241    }
242
243    /*
244     * Perform the requested conversion.
245     */
246    idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
247			   : UIDNA_NONTRANSITIONAL_TO_UNICODE, &error);
248    anl = uidna_nameToUnicodeUTF8(idna,
249				  name, strlen(name),
250				  buf, sizeof(buf) - 1,
251				  &info,
252				  &error);
253    uidna_close(idna);
254
255    /*
256     * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long
257     * name or a name that contains an over-long NR-LDH label (and perhaps
258     * other invalid forms that are not covered in UTS 46, section 4.1). We
259     * rely on midna_domain_to_ascii() to validate the output.
260     */
261    if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
262	buf[anl] = 0;				/* XXX */
263	if (midna_domain_to_ascii(buf) == 0)
264	    return (0);
265	return (mystrndup(buf, anl));
266    } else {
267	msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s",
268		 myname, name, midna_domain_strerror(error, info.errors));
269	return (0);
270    }
271}
272
273/* midna_domain_cache_free - cache element destructor */
274
275static void midna_domain_cache_free(void *value, void *unused_context)
276{
277    if (value)
278	myfree(value);
279}
280
281/* midna_domain_to_ascii - convert name to ASCII */
282
283const char *midna_domain_to_ascii(const char *name)
284{
285    static CTABLE *midna_domain_to_ascii_cache = 0;
286
287    if (midna_domain_to_ascii_cache == 0)
288	midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size,
289					       midna_domain_to_ascii_create,
290						    midna_domain_cache_free,
291						    (void *) 0);
292    return (ctable_locate(midna_domain_to_ascii_cache, name));
293}
294
295/* midna_domain_to_utf8 - convert name to UTF8 */
296
297const char *midna_domain_to_utf8(const char *name)
298{
299    static CTABLE *midna_domain_to_utf8_cache = 0;
300
301    if (midna_domain_to_utf8_cache == 0)
302	midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size,
303						midna_domain_to_utf8_create,
304						   midna_domain_cache_free,
305						   (void *) 0);
306    return (ctable_locate(midna_domain_to_utf8_cache, name));
307}
308
309/* midna_domain_suffix_to_ascii - convert .name to ASCII */
310
311const char *midna_domain_suffix_to_ascii(const char *suffix)
312{
313    const char *cache_res;
314
315    /*
316     * If prepending x to .name causes the result to become too long, then
317     * the suffix is bad.
318     */
319    if (midna_domain_buf == 0)
320	midna_domain_buf = vstring_alloc(100);
321    vstring_sprintf(midna_domain_buf, "x%s", suffix);
322    if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0)
323	return (0);
324    else
325	return (cache_res + 1);
326}
327
328/* midna_domain_suffix_to_utf8 - convert .name to UTF8 */
329
330const char *midna_domain_suffix_to_utf8(const char *name)
331{
332    const char *cache_res;
333
334    /*
335     * If prepending x to .name causes the result to become too long, then
336     * the suffix is bad.
337     */
338    if (midna_domain_buf == 0)
339	midna_domain_buf = vstring_alloc(100);
340    vstring_sprintf(midna_domain_buf, "x%s", name);
341    if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0)
342	return (0);
343    else
344	return (cache_res + 1);
345}
346
347#ifdef TEST
348
349 /*
350  * Test program - reads names from stdin, reports invalid names to stderr.
351  */
352#include <unistd.h>
353#include <stdlib.h>
354#include <locale.h>
355
356#include <stringops.h>			/* XXX util_utf8_enable */
357#include <vstring.h>
358#include <vstream.h>
359#include <vstring_vstream.h>
360#include <msg_vstream.h>
361
362int     main(int argc, char **argv)
363{
364    VSTRING *buffer = vstring_alloc(1);
365    const char *bp;
366    const char *ascii;
367    const char *utf8;
368
369    if (setlocale(LC_ALL, "C") == 0)
370	msg_fatal("setlocale(LC_ALL, C) failed: %m");
371
372    msg_vstream_init(argv[0], VSTREAM_ERR);
373    /* msg_verbose = 1; */
374    util_utf8_enable = 1;
375
376    if (geteuid() == 0) {
377	midna_domain_pre_chroot();
378	if (chroot(".") != 0)
379	    msg_fatal("chroot(\".\"): %m");
380    }
381    while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
382	bp = STR(buffer);
383	msg_info("> %s", bp);
384	while (ISSPACE(*bp))
385	    bp++;
386	if (*bp == '#' || *bp == 0)
387	    continue;
388	msg_info("unconditional conversions:");
389	utf8 = midna_domain_to_utf8(bp);
390	msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)");
391	ascii = midna_domain_to_ascii(bp);
392	msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)");
393	msg_info("conditional conversions:");
394	if (!allascii(bp)) {
395	    if (ascii != 0) {
396		utf8 = midna_domain_to_utf8(ascii);
397		msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"",
398			 bp, ascii, utf8 ? utf8 : "(error)");
399		if (utf8 != 0) {
400		    if (strcmp(utf8, bp) != 0)
401			msg_warn("\"%s\" != \"%s\"", bp, utf8);
402		}
403	    }
404	} else {
405	    if (utf8 != 0) {
406		ascii = midna_domain_to_ascii(utf8);
407		msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"",
408			 bp, utf8, ascii ? ascii : "(error)");
409		if (ascii != 0) {
410		    if (strcmp(ascii, bp) != 0)
411			msg_warn("\"%s\" != \"%s\"", bp, ascii);
412		}
413	    }
414	}
415    }
416    exit(0);
417}
418
419#endif					/* TEST */
420
421#endif					/* NO_EAI */
422