1/*	$NetBSD: valid_hostname.c,v 1.3 2023/12/23 20:30:46 christos Exp $	*/
2
3/*++
4/* NAME
5/*	valid_hostname 3
6/* SUMMARY
7/*	network name validation
8/* SYNOPSIS
9/*	#include <valid_hostname.h>
10/*
11/*	int	valid_hostname(name, gripe)
12/*	const char *name;
13/*	int	gripe;
14/*
15/*	int	valid_hostaddr(addr, gripe)
16/*	const char *addr;
17/*	int	gripe;
18/*
19/*	int	valid_ipv4_hostaddr(addr, gripe)
20/*	const char *addr;
21/*	int	gripe;
22/*
23/*	int	valid_ipv6_hostaddr(addr, gripe)
24/*	const char *addr;
25/*	int	gripe;
26/*
27/*	int	valid_hostport(port, gripe)
28/*	const char *port;
29/*	int	gripe;
30/* DESCRIPTION
31/*	valid_hostname() scrutinizes a hostname: the name should
32/*	be no longer than VALID_HOSTNAME_LEN characters, should
33/*	contain only letters, digits, dots and hyphens, no adjacent
34/*	dots, no leading or trailing dots or hyphens, no labels
35/*	longer than VALID_LABEL_LEN characters, and it should not
36/*	be all numeric.
37/*
38/*	valid_hostaddr() requires that the input is a valid string
39/*	representation of an IPv4 or IPv6 network address as
40/*	described next.
41/*
42/*	valid_ipv4_hostaddr() and valid_ipv6_hostaddr() implement
43/*	protocol-specific address syntax checks. A valid IPv4
44/*	address is in dotted-quad decimal form. A valid IPv6 address
45/*	has 16-bit hexadecimal fields separated by ":", and does not
46/*	include the RFC 2821 style "IPv6:" prefix.
47/*
48/*	These routines operate silently unless the gripe parameter
49/*	specifies a non-zero value. The macros DO_GRIPE and DONT_GRIPE
50/*	provide suitable constants.
51/*
52/*	valid_hostport() requires that the input is a valid string
53/*	representation of a TCP or UDP port number.
54/* BUGS
55/*	valid_hostmumble() does not guarantee that string lengths
56/*	fit the buffer sizes defined in myaddrinfo(3h).
57/* DIAGNOSTICS
58/*	All functions return zero if they disagree with the input.
59/* SEE ALSO
60/*	RFC 952, RFC 1123, RFC 1035, RFC 2373.
61/* LICENSE
62/* .ad
63/* .fi
64/*	The Secure Mailer license must be distributed with this software.
65/* AUTHOR(S)
66/*	Wietse Venema
67/*	IBM T.J. Watson Research
68/*	P.O. Box 704
69/*	Yorktown Heights, NY 10598, USA
70/*--*/
71
72/* System library. */
73
74#include <sys_defs.h>
75#include <string.h>
76#include <ctype.h>
77#include <stdlib.h>
78
79/* Utility library. */
80
81#include "msg.h"
82#include "mymalloc.h"
83#include "stringops.h"
84#include "valid_hostname.h"
85
86/* valid_hostname - screen out bad hostnames */
87
88int     valid_hostname(const char *name, int flags)
89{
90    const char *myname = "valid_hostname";
91    const char *cp;
92    int     label_length = 0;
93    int     label_count = 0;
94    int     non_numeric = 0;
95    int     ch;
96    int     gripe = flags & DO_GRIPE;
97
98    /*
99     * Trivial cases first.
100     */
101    if (*name == 0) {
102	if (gripe)
103	    msg_warn("%s: empty hostname", myname);
104	return (0);
105    }
106
107    /*
108     * Find bad characters or label lengths. Find adjacent delimiters.
109     */
110    for (cp = name; (ch = *(unsigned char *) cp) != 0; cp++) {
111	if (ISALNUM(ch) || ch == '_') {		/* grr.. */
112	    if (label_length == 0)
113		label_count++;
114	    label_length++;
115	    if (label_length > VALID_LABEL_LEN) {
116		if (gripe)
117		    msg_warn("%s: hostname label too long: %.100s", myname, name);
118		return (0);
119	    }
120	    if (!ISDIGIT(ch))
121		non_numeric = 1;
122	} else if ((flags & DO_WILDCARD) && ch == '*') {
123	    if (label_length || label_count || (cp[1] && cp[1] != '.')) {
124		if (gripe)
125		    msg_warn("%s: '*' can be the first label only: %.100s", myname, name);
126		return (0);
127	    }
128	    label_count++;
129	    label_length++;
130	    non_numeric = 1;
131	} else if (ch == '.') {
132	    if (label_length == 0 || cp[1] == 0) {
133		if (gripe)
134		    msg_warn("%s: misplaced delimiter: %.100s", myname, name);
135		return (0);
136	    }
137	    label_length = 0;
138	} else if (ch == '-') {
139	    non_numeric = 1;
140	    label_length++;
141	    if (label_length == 1 || cp[1] == 0 || cp[1] == '.') {
142		if (gripe)
143		    msg_warn("%s: misplaced hyphen: %.100s", myname, name);
144		return (0);
145	    }
146	}
147#ifdef SLOPPY_VALID_HOSTNAME
148	else if (ch == ':' && valid_ipv6_hostaddr(name, DONT_GRIPE)) {
149	    non_numeric = 0;
150	    break;
151	}
152#endif
153	else {
154	    if (gripe)
155		msg_warn("%s: invalid character %d(decimal): %.100s",
156			 myname, ch, name);
157	    return (0);
158	}
159    }
160
161    if (non_numeric == 0) {
162	if (gripe)
163	    msg_warn("%s: numeric hostname: %.100s", myname, name);
164#ifndef SLOPPY_VALID_HOSTNAME
165	return (0);
166#endif
167    }
168    if (cp - name > VALID_HOSTNAME_LEN) {
169	if (gripe)
170	    msg_warn("%s: bad length %d for %.100s...",
171		     myname, (int) (cp - name), name);
172	return (0);
173    }
174    return (1);
175}
176
177/* valid_hostaddr - verify numerical address syntax */
178
179int     valid_hostaddr(const char *addr, int gripe)
180{
181    const char *myname = "valid_hostaddr";
182
183    /*
184     * Trivial cases first.
185     */
186    if (*addr == 0) {
187	if (gripe)
188	    msg_warn("%s: empty address", myname);
189	return (0);
190    }
191
192    /*
193     * Protocol-dependent processing next.
194     */
195    if (strchr(addr, ':') != 0)
196	return (valid_ipv6_hostaddr(addr, gripe));
197    else
198	return (valid_ipv4_hostaddr(addr, gripe));
199}
200
201/* valid_ipv4_hostaddr - test dotted quad string for correctness */
202
203int     valid_ipv4_hostaddr(const char *addr, int gripe)
204{
205    const char *cp;
206    const char *myname = "valid_ipv4_hostaddr";
207    int     in_byte = 0;
208    int     byte_count = 0;
209    int     byte_val = 0;
210    int     ch;
211
212#define BYTES_NEEDED	4
213
214    /*
215     * Scary code to avoid sscanf() overflow nasties.
216     *
217     * This routine is called by valid_ipv6_hostaddr(). It must not call that
218     * routine, to avoid deadly recursion.
219     */
220    for (cp = addr; (ch = *(unsigned const char *) cp) != 0; cp++) {
221	if (ISDIGIT(ch)) {
222	    if (in_byte == 0) {
223		in_byte = 1;
224		byte_val = 0;
225		byte_count++;
226	    }
227	    byte_val *= 10;
228	    byte_val += ch - '0';
229	    if (byte_val > 255) {
230		if (gripe)
231		    msg_warn("%s: invalid octet value: %.100s", myname, addr);
232		return (0);
233	    }
234	} else if (ch == '.') {
235	    if (in_byte == 0 || cp[1] == 0) {
236		if (gripe)
237		    msg_warn("%s: misplaced dot: %.100s", myname, addr);
238		return (0);
239	    }
240	    /* XXX Allow 0.0.0.0 but not 0.1.2.3 */
241	    if (byte_count == 1 && byte_val == 0 && addr[strspn(addr, "0.")]) {
242		if (gripe)
243		    msg_warn("%s: bad initial octet value: %.100s", myname, addr);
244		return (0);
245	    }
246	    in_byte = 0;
247	} else {
248	    if (gripe)
249		msg_warn("%s: invalid character %d(decimal): %.100s",
250			 myname, ch, addr);
251	    return (0);
252	}
253    }
254
255    if (byte_count != BYTES_NEEDED) {
256	if (gripe)
257	    msg_warn("%s: invalid octet count: %.100s", myname, addr);
258	return (0);
259    }
260    return (1);
261}
262
263/* valid_ipv6_hostaddr - validate IPv6 address syntax */
264
265int     valid_ipv6_hostaddr(const char *addr, int gripe)
266{
267    const char *myname = "valid_ipv6_hostaddr";
268    int     null_field = 0;
269    int     field = 0;
270    unsigned char *cp = (unsigned char *) addr;
271    int     len = 0;
272
273    /*
274     * FIX 200501 The IPv6 patch validated syntax with getaddrinfo(), but I
275     * am not confident that everyone's system library routines are robust
276     * enough, like buffer overflow free. Remember, the valid_hostmumble()
277     * routines are meant to protect Postfix against malformed information in
278     * data received from the network.
279     *
280     * We require eight-field hex addresses of the form 0:1:2:3:4:5:6:7,
281     * 0:1:2:3:4:5:6a.6b.7c.7d, or some :: compressed version of the same.
282     *
283     * Note: the character position is advanced inside the loop. I have added
284     * comments to show why we can't get stuck.
285     */
286    for (;;) {
287	switch (*cp) {
288	case 0:
289	    /* Terminate the loop. */
290	    if (field < 2) {
291		if (gripe)
292		    msg_warn("%s: too few `:' in IPv6 address: %.100s",
293			     myname, addr);
294		return (0);
295	    } else if (len == 0 && null_field != field - 1) {
296		if (gripe)
297		    msg_warn("%s: bad null last field in IPv6 address: %.100s",
298			     myname, addr);
299		return (0);
300	    } else
301		return (1);
302	case '.':
303	    /* Terminate the loop. */
304	    if (field < 2 || field > 6) {
305		if (gripe)
306		    msg_warn("%s: malformed IPv4-in-IPv6 address: %.100s",
307			     myname, addr);
308		return (0);
309	    } else
310		/* NOT: valid_hostaddr(). Avoid recursion. */
311		return (valid_ipv4_hostaddr((char *) cp - len, gripe));
312	case ':':
313	    /* Advance by exactly 1 character position or terminate. */
314	    if (field == 0 && len == 0 && ISALNUM(cp[1])) {
315		if (gripe)
316		    msg_warn("%s: bad null first field in IPv6 address: %.100s",
317			     myname, addr);
318		return (0);
319	    }
320	    field++;
321	    if (field > 7) {
322		if (gripe)
323		    msg_warn("%s: too many `:' in IPv6 address: %.100s",
324			     myname, addr);
325		return (0);
326	    }
327	    cp++;
328	    len = 0;
329	    if (*cp == ':') {
330		if (null_field > 0) {
331		    if (gripe)
332			msg_warn("%s: too many `::' in IPv6 address: %.100s",
333				 myname, addr);
334		    return (0);
335		}
336		null_field = field;
337	    }
338	    break;
339	default:
340	    /* Advance by at least 1 character position or terminate. */
341	    len = strspn((char *) cp, "0123456789abcdefABCDEF");
342	    if (len /* - strspn((char *) cp, "0") */ > 4) {
343		if (gripe)
344		    msg_warn("%s: malformed IPv6 address: %.100s",
345			     myname, addr);
346		return (0);
347	    }
348	    if (len <= 0) {
349		if (gripe)
350		    msg_warn("%s: invalid character %d(decimal) in IPv6 address: %.100s",
351			     myname, *cp, addr);
352		return (0);
353	    }
354	    cp += len;
355	    break;
356	}
357    }
358}
359
360/* valid_hostport - validate numeric port */
361
362int     valid_hostport(const char *str, int gripe)
363{
364    const char *myname = "valid_hostport";
365    int     port;
366
367    if (str[0] == '0' && str[1] != 0) {
368	if (gripe)
369	    msg_warn("%s: leading zero in port number: %.100s", myname, str);
370	return (0);
371    }
372    if (alldig(str) == 0) {
373	if (gripe)
374	    msg_warn("%s: non-numeric port number: %.100s", myname, str);
375	return (0);
376    }
377    if (strlen(str) > strlen("65535")
378	|| (port = atoi(str)) > 65535 || port < 0) {
379	if (gripe)
380	    msg_warn("%s: out-of-range port number: %.100s", myname, str);
381	return (0);
382    }
383    return (1);
384}
385
386#ifdef TEST
387
388 /*
389  * Test program - reads hostnames from stdin, reports invalid hostnames to
390  * stderr.
391  */
392#include <stdlib.h>
393
394#include "vstring.h"
395#include "vstream.h"
396#include "vstring_vstream.h"
397#include "msg_vstream.h"
398
399int     main(int unused_argc, char **argv)
400{
401    VSTRING *buffer = vstring_alloc(1);
402
403    msg_vstream_init(argv[0], VSTREAM_ERR);
404    msg_verbose = 1;
405
406    while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
407	msg_info("testing: \"%s\"", vstring_str(buffer));
408	valid_hostname(vstring_str(buffer), DO_GRIPE);
409	valid_hostaddr(vstring_str(buffer), DO_GRIPE);
410    }
411    exit(0);
412}
413
414#endif
415