1/*	$NetBSD$	*/
2
3/*++
4/* NAME
5/*	ip_match 3
6/* SUMMARY
7/*	IP address pattern matching
8/* SYNOPSIS
9/*	#include <ip_match.h>
10/*
11/*	char	*ip_match_parse(byte_codes, pattern)
12/*	VSTRING	*byte_codes;
13/*	char	*pattern;
14/*
15/*	char	*ip_match_save(byte_codes)
16/*	const VSTRING *byte_codes;
17/*
18/*	int	ip_match_execute(byte_codes, addr_bytes)
19/*	cost char *byte_codes;
20/*	const char *addr_bytes;
21/*
22/*	char	*ip_match_dump(printable, byte_codes)
23/*	VSTRING	*printable;
24/*	const char *byte_codes;
25/* DESCRIPTION
26/*	This module supports IP address pattern matching. See below
27/*	for a description of the supported address pattern syntax.
28/*
29/*	This implementation aims to minimize the cost of encoding
30/*	the pattern in internal form, while still providing good
31/*	matching performance in the typical case.   The first byte
32/*	of an encoded pattern specifies the expected address family
33/*	(for example, AF_INET); other details of the encoding are
34/*	private and are subject to change.
35/*
36/*	ip_match_parse() converts the user-specified pattern to
37/*	internal form. The result value is a null pointer in case
38/*	of success, or a pointer into the byte_codes buffer with a
39/*	detailed problem description.
40/*
41/*	ip_match_save() saves the result from ip_match_parse() for
42/*	longer-term usage. The result should be passed to myfree().
43/*
44/*	ip_match_execute() matches a binary network in addr_bytes
45/*	against a byte-code array in byte_codes. It is an error to
46/*	use different address families for the byte_codes and addr_bytes
47/*	arguments (the first byte-code value contains the expected
48/*	address family).  The result is non-zero in case of success.
49/*
50/*	ip_match_dump() produces an ASCII dump of a byte-code array.
51/*	The dump is supposed to be identical to the input pattern
52/*	modulo upper/lower case or leading nulls with IPv6).  This
53/*	function is primarily a debugging aid.
54/*
55/*	Arguments
56/* .IP addr_bytes
57/*	Binary network address in network-byte order.
58/* .IP byte_codes
59/*	Byte-code array produced by ip_match_parse().
60/* .IP pattern
61/*	Human-readable address pattern.
62/* .IP printable
63/*	storage for ASCII dump of a byte-code array.
64/* IPV4 PATTERN SYNTAX
65/* .ad
66/* .fi
67/*	An IPv4 address pattern has four fields separated by ".".
68/*	Each field is either a decimal number, or a sequence inside
69/*	"[]" that contains one or more ";"-separated decimal
70/*	numbers or number..number ranges.
71/*
72/*	Examples of patterns are 1.2.3.4 (matches itself, as one
73/*	would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4,
74/*	1.2.3.6, 1.2.3.7, 1.2.3.8).
75/*
76/*	Thus, any pattern field can be a sequence inside "[]", but
77/*	a "[]" sequence cannot span multiple address fields, and
78/*	a pattern field cannot contain both a number and a "[]"
79/*	sequence at the same time.
80/*
81/*	This means that the pattern 1.2.[3.4] is not valid (the
82/*	sequence [3.4] cannot span two address fields) and the
83/*	pattern 1.2.3.3[6..9] is also not valid (the last field
84/*	cannot be both number 3 and sequence [6..9] at the same
85/*	time).
86/*
87/*	The syntax for IPv4 patterns is as follows:
88/*
89/* .in +5
90/*	v4pattern = v4field "." v4field "." v4field "." v4field
91/* .br
92/*	v4field = v4octet | "[" v4sequence "]"
93/* .br
94/*	v4octet = any decimal number in the range 0 through 255
95/* .br
96/*	v4sequence = v4seq_member | v4sequence ";" v4seq_member
97/* .br
98/*	v4seq_member = v4octet | v4octet ".." v4octet
99/* .in
100/* LICENSE
101/* .ad
102/* .fi
103/*	The Secure Mailer license must be distributed with this
104/*	software.
105/* AUTHOR(S)
106/*	Wietse Venema
107/*	IBM T.J. Watson Research
108/*	P.O. Box 704
109/*	Yorktown Heights, NY 10598, USA
110/*--*/
111
112/* System library. */
113
114#include <sys_defs.h>
115#include <sys/socket.h>
116#include <ctype.h>
117#include <string.h>
118
119/* Utility library. */
120
121#include <msg.h>
122#include <mymalloc.h>
123#include <vstring.h>
124#include <ip_match.h>
125
126 /*
127  * Token values. The in-band values are also used as byte-code values.
128  */
129#define IP_MATCH_CODE_OPEN	'['	/* in-band */
130#define IP_MATCH_CODE_CLOSE	']'	/* in-band */
131#define IP_MATCH_CODE_OVAL	'N'	/* in-band */
132#define IP_MATCH_CODE_RANGE	'R'	/* in-band */
133#define IP_MATCH_CODE_EOF	'\0'	/* in-band */
134#define IP_MATCH_CODE_ERR	256	/* out-of-band */
135
136 /*
137  * SLMs.
138  */
139#define STR	vstring_str
140#define LEN	VSTRING_LEN
141
142/* ip_match_save - make longer-term copy of byte code */
143
144char   *ip_match_save(const VSTRING *byte_codes)
145{
146    char   *dst;
147
148    dst = mymalloc(LEN(byte_codes));
149    return (memcpy(dst, STR(byte_codes), LEN(byte_codes)));
150}
151
152/* ip_match_dump - byte-code pretty printer */
153
154char   *ip_match_dump(VSTRING *printable, const char *byte_codes)
155{
156    const char *myname = "ip_match_dump";
157    const unsigned char *bp;
158    int     octet_count = 0;
159    int     ch;
160
161    /*
162     * Sanity check. Use different dumping loops for AF_INET and AF_INET6.
163     */
164    if (*byte_codes != AF_INET)
165	msg_panic("%s: malformed byte-code header", myname);
166
167    /*
168     * Pretty-print and sanity-check the byte codes. Note: the loops in this
169     * code have no auto-increment at the end of the iteration. Instead, each
170     * byte-code handler bumps the byte-code pointer appropriately.
171     */
172    VSTRING_RESET(printable);
173    bp = (const unsigned char *) byte_codes + 1;
174    for (;;) {
175
176	/*
177	 * Simple numeric field.
178	 */
179	if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
180	    vstring_sprintf_append(printable, "%d", *bp);
181	    bp += 1;
182	}
183
184	/*
185	 * Wild-card numeric field.
186	 */
187	else if (ch == IP_MATCH_CODE_OPEN) {
188	    vstring_sprintf_append(printable, "[");
189	    for (;;) {
190		/* Numeric range. */
191		if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
192		    vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]);
193		    bp += 2;
194		}
195		/* Number. */
196		else if (ch == IP_MATCH_CODE_OVAL) {
197		    vstring_sprintf_append(printable, "%d", *bp);
198		    bp += 1;
199		}
200		/* End-of-wildcard. */
201		else if (ch == IP_MATCH_CODE_CLOSE) {
202		    break;
203		}
204		/* Corruption. */
205		else {
206		    msg_panic("%s: unexpected byte code (decimal %d) "
207			      "after \"%s\"", myname, ch, STR(printable));
208		}
209		/* Output the wild-card field separator and repeat the loop. */
210		if (*bp != IP_MATCH_CODE_CLOSE)
211		    vstring_sprintf_append(printable, ";");
212	    }
213	    vstring_sprintf_append(printable, "]");
214	}
215
216	/*
217	 * Corruption.
218	 */
219	else {
220	    msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
221		      myname, ch, STR(printable));
222	}
223
224	/*
225	 * Require four octets, not one more, not one less.
226	 */
227	if (++octet_count == 4) {
228	    if (*bp != 0)
229		msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
230			  myname, ch, STR(printable));
231	    return (STR(printable));
232	}
233	if (*bp == 0)
234	    msg_panic("%s: truncated byte code after \"%s\"",
235		      myname, STR(printable));
236
237	/*
238	 * Output the address field separator and repeat the loop.
239	 */
240	vstring_sprintf_append(printable, ".");
241    }
242}
243
244/* ip_match_print_code_prefix - printable byte-code prefix */
245
246static char *ip_match_print_code_prefix(const char *byte_codes, size_t len)
247{
248    static VSTRING *printable = 0;
249    const char *fmt;
250    const char *bp;
251
252    /*
253     * This is primarily for emergency debugging so we don't care about
254     * non-reentrancy.
255     */
256    if (printable == 0)
257	printable = vstring_alloc(100);
258    else
259	VSTRING_RESET(printable);
260
261    /*
262     * Use decimal for IPv4 and hexadecimal otherwise, so that address octet
263     * values are easy to recognize.
264     */
265    fmt = (*byte_codes == AF_INET ? "%d " : "%02x ");
266    for (bp = byte_codes; bp < byte_codes + len; bp++)
267	vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp);
268
269    return (STR(printable));
270}
271
272/* ip_match_execute - byte-code matching engine */
273
274int     ip_match_execute(const char *byte_codes, const char *addr_bytes)
275{
276    const char *myname = "ip_match_execute";
277    const unsigned char *bp;
278    const unsigned char *ap;
279    int     octet_count = 0;
280    int     ch;
281    int     matched;
282
283    /*
284     * Sanity check. Use different execute loops for AF_INET and AF_INET6.
285     */
286    if (*byte_codes != AF_INET)
287	msg_panic("%s: malformed byte-code header (decimal %d)",
288		  myname, *(const unsigned char *) byte_codes);
289
290    /*
291     * Match the address bytes against the byte codes. Avoid problems with
292     * (char -> int) sign extension on architectures with signed characters.
293     */
294    bp = (const unsigned char *) byte_codes + 1;
295    ap = (const unsigned char *) addr_bytes;
296
297    for (octet_count = 0; octet_count < 4; octet_count++, ap++) {
298
299	/*
300	 * Simple numeric field.
301	 */
302	if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
303	    if (*ap == *bp)
304		bp += 1;
305	    else
306		return (0);
307	}
308
309	/*
310	 * Wild-card numeric field.
311	 */
312	else if (ch == IP_MATCH_CODE_OPEN) {
313	    matched = 0;
314	    for (;;) {
315		/* Numeric range. */
316		if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
317		    if (!matched)
318			matched = (*ap >= bp[0] && *ap <= bp[1]);
319		    bp += 2;
320		}
321		/* Number. */
322		else if (ch == IP_MATCH_CODE_OVAL) {
323		    if (!matched)
324			matched = (*ap == *bp);
325		    bp += 1;
326		}
327		/* End-of-wildcard. */
328		else if (ch == IP_MATCH_CODE_CLOSE) {
329		    break;
330		}
331		/* Corruption. */
332		else {
333		    size_t  len = (const char *) bp - byte_codes - 1;
334
335		    msg_panic("%s: unexpected byte code (decimal %d) "
336			      "after \"%s\"", myname, ch,
337			      ip_match_print_code_prefix(byte_codes, len));
338		}
339	    }
340	    if (matched == 0)
341		return (0);
342	}
343
344	/*
345	 * Corruption.
346	 */
347	else {
348	    size_t  len = (const char *) bp - byte_codes - 1;
349
350	    msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
351		   myname, ch, ip_match_print_code_prefix(byte_codes, len));
352	}
353    }
354    return (1);
355}
356
357/* ip_match_next_token - carve out the next token from input pattern */
358
359static int ip_match_next_token(char **pstart, char **psaved_start, int *poval)
360{
361    unsigned char *cp;
362    int     oval;			/* octet value */
363    int     type;			/* token value */
364
365    /*
366     * Return a literal, error, or EOF token. Update the read pointer to the
367     * start of the next token or leave it at the string terminator.
368     */
369#define IP_MATCH_RETURN_TOK(next, type) \
370    do { *pstart = (char *) (next); return (type); } while (0)
371
372    /*
373     * Return a token that contains an IPv4 address octet value.
374     */
375#define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \
376	*poval = (oval); IP_MATCH_RETURN_TOK((next), type); \
377    } while (0)
378
379    /*
380     * Light-weight tokenizer. Each result is an IPv4 address octet value, a
381     * literal character value, error, or EOF.
382     */
383    *psaved_start = *pstart;
384    cp = (unsigned char *) *pstart;
385    if (ISDIGIT(*cp)) {
386	oval = *cp - '0';
387	type = IP_MATCH_CODE_OVAL;
388	for (cp += 1; ISDIGIT(*cp); cp++) {
389	    oval *= 10;
390	    oval += *cp - '0';
391	    if (oval > 255)
392		type = IP_MATCH_CODE_ERR;
393	}
394	IP_MATCH_RETURN_TOK_VAL(cp, type, oval);
395    } else {
396	IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp);
397    }
398}
399
400/* ipmatch_print_parse_error - formatted parsing error, with context */
401
402static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply,
403						               char *start,
404						               char *here,
405						               char *next,
406						        const char *fmt,...)
407{
408    va_list ap;
409    int     start_width;
410    int     here_width;
411
412    /*
413     * Format the error type.
414     */
415    va_start(ap, fmt);
416    vstring_vsprintf(reply, fmt, ap);
417    va_end(ap);
418
419    /*
420     * Format the error context. The syntax is complex enough that it is
421     * worth the effort to precisely indicate what input is in error.
422     *
423     * XXX Workaround for %.*s to avoid output when a zero width is specified.
424     */
425    if (start != 0) {
426	start_width = here - start;
427	here_width = next - here;
428	vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"",
429			       start_width, start_width == 0 ? "" : start,
430			     here_width, here_width == 0 ? "" : here, next);
431    }
432}
433
434/* ip_match_parse - parse an entire wild-card address pattern */
435
436char   *ip_match_parse(VSTRING *byte_codes, char *pattern)
437{
438    int     octet_count;
439    char   *saved_cp;
440    char   *cp;
441    int     token_type;
442    int     look_ahead;
443    int     oval;
444    int     saved_oval;
445
446    /*
447     * Simplify this if we change to {} for wildcard notation.
448     */
449#define FIND_TERMINATOR(start, cp) do { \
450	int _level = 0; \
451	for (cp = (start) ; *cp; cp++) { \
452	    if (*cp == '[') _level++; \
453	    if (*cp != ']') continue; \
454	    if (--_level == 0) break; \
455	} \
456    } while (0)
457
458    /*
459     * Strip [] around the entire pattern.
460     */
461    if (*pattern == '[') {
462	FIND_TERMINATOR(pattern, cp);
463	if (cp[0] == 0) {
464	    vstring_sprintf(byte_codes, "missing \"]\" character");
465	    return (STR(byte_codes));
466	}
467	if (cp[1] == 0) {
468	    *cp = 0;
469	    pattern += 1;
470	}
471    }
472
473    /*
474     * Sanity check. In this case we can't show any error context.
475     */
476    if (*pattern == 0) {
477	vstring_sprintf(byte_codes, "empty address pattern");
478	return (STR(byte_codes));
479    }
480
481    /*
482     * Simple parser with on-the-fly encoding. For now, IPv4 support only.
483     * Use different parser loops for IPv4 and IPv6.
484     */
485    VSTRING_RESET(byte_codes);
486    VSTRING_ADDCH(byte_codes, AF_INET);
487    octet_count = 0;
488    cp = pattern;
489
490    /*
491     * Require four address fields separated by ".", each field containing a
492     * numeric octet value or a sequence inside []. The loop head has no test
493     * and does not step the loop variable. The tokenizer advances the loop
494     * variable, and the loop termination logic is inside the loop.
495     */
496    for (;;) {
497	switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) {
498
499	    /*
500	     * Numeric address field.
501	     */
502	case IP_MATCH_CODE_OVAL:
503	    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
504	    VSTRING_ADDCH(byte_codes, oval);
505	    break;
506
507	    /*
508	     * Wild-card address field.
509	     */
510	case IP_MATCH_CODE_OPEN:
511	    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN);
512	    /* Require ";"-separated numbers or numeric ranges. */
513	    for (;;) {
514		token_type = ip_match_next_token(&cp, &saved_cp, &oval);
515		if (token_type == IP_MATCH_CODE_OVAL) {
516		    saved_oval = oval;
517		    look_ahead = ip_match_next_token(&cp, &saved_cp, &oval);
518		    /* Numeric range. */
519		    if (look_ahead == '.') {
520			/* Brute-force parsing. */
521			if (ip_match_next_token(&cp, &saved_cp, &oval) == '.'
522			    && ip_match_next_token(&cp, &saved_cp, &oval)
523			    == IP_MATCH_CODE_OVAL
524			    && saved_oval <= oval) {
525			    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE);
526			    VSTRING_ADDCH(byte_codes, saved_oval);
527			    VSTRING_ADDCH(byte_codes, oval);
528			    look_ahead =
529				ip_match_next_token(&cp, &saved_cp, &oval);
530			} else {
531			    ipmatch_print_parse_error(byte_codes, pattern,
532						      saved_cp, cp,
533						      "numeric range error");
534			    return (STR(byte_codes));
535			}
536		    }
537		    /* Single number. */
538		    else {
539			VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
540			VSTRING_ADDCH(byte_codes, saved_oval);
541		    }
542		    /* Require ";" or end-of-wildcard. */
543		    token_type = look_ahead;
544		    if (token_type == ';') {
545			continue;
546		    } else if (token_type == IP_MATCH_CODE_CLOSE) {
547			break;
548		    } else {
549			ipmatch_print_parse_error(byte_codes, pattern,
550						  saved_cp, cp,
551						  "need \";\" or \"%c\"",
552						  IP_MATCH_CODE_CLOSE);
553			return (STR(byte_codes));
554		    }
555		} else {
556		    ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
557					      "need decimal number 0..255");
558		    return (STR(byte_codes));
559		}
560	    }
561	    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE);
562	    break;
563
564	    /*
565	     * Invalid field.
566	     */
567	default:
568	    ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
569				      "need decimal number 0..255 or \"%c\"",
570				      IP_MATCH_CODE_OPEN);
571	    return (STR(byte_codes));
572	}
573	octet_count += 1;
574
575	/*
576	 * Require four address fields. Not one more, not one less.
577	 */
578	if (octet_count == 4) {
579	    if (*cp != 0) {
580		(void) ip_match_next_token(&cp, &saved_cp, &oval);
581		ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
582					  "garbage after pattern");
583		return (STR(byte_codes));
584	    }
585	    VSTRING_ADDCH(byte_codes, 0);
586	    return (0);
587	}
588
589	/*
590	 * Require "." before the next address field.
591	 */
592	if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') {
593	    ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
594				      "need \".\"");
595	    return (STR(byte_codes));
596	}
597    }
598}
599
600#ifdef TEST
601
602 /*
603  * Dummy main program for regression tests.
604  */
605#include <sys/socket.h>
606#include <netinet/in.h>
607#include <arpa/inet.h>
608#include <stdlib.h>
609#include <unistd.h>
610#include <vstream.h>
611#include <vstring_vstream.h>
612#include <stringops.h>
613
614int     main(int argc, char **argv)
615{
616    VSTRING *byte_codes = vstring_alloc(100);
617    VSTRING *line_buf = vstring_alloc(100);
618    char   *bufp;
619    char   *err;
620    char   *user_pattern;
621    char   *user_address;
622    int     echo_input = !isatty(0);
623
624    /*
625     * Iterate over the input stream. The input format is a pattern, followed
626     * by optional addresses to match against.
627     */
628    while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) {
629	bufp = STR(line_buf);
630	if (echo_input) {
631	    vstream_printf("> %s\n", bufp);
632	    vstream_fflush(VSTREAM_OUT);
633	}
634	if (*bufp == '#')
635	    continue;
636	if ((user_pattern = mystrtok(&bufp, " \t")) == 0)
637	    continue;
638
639	/*
640	 * Parse and dump the pattern.
641	 */
642	if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) {
643	    vstream_printf("Error: %s\n", err);
644	} else {
645	    vstream_printf("Code: %s\n",
646			   ip_match_dump(line_buf, STR(byte_codes)));
647	}
648	vstream_fflush(VSTREAM_OUT);
649
650	/*
651	 * Match the optional patterns.
652	 */
653	while ((user_address = mystrtok(&bufp, " \t")) != 0) {
654	    struct in_addr netw_addr;
655
656	    switch (inet_pton(AF_INET, user_address, &netw_addr)) {
657	    case 1:
658		vstream_printf("Match %s: %s\n", user_address,
659			       ip_match_execute(STR(byte_codes),
660						(char *) &netw_addr.s_addr) ?
661			       "yes" : "no");
662		break;
663	    case 0:
664		vstream_printf("bad address syntax: %s\n", user_address);
665		break;
666	    case -1:
667		vstream_printf("%s: %m\n", user_address);
668		break;
669	    }
670	    vstream_fflush(VSTREAM_OUT);
671	}
672    }
673    vstring_free(line_buf);
674    vstring_free(byte_codes);
675    exit(0);
676}
677
678#endif
679