1/*++
2/* NAME
3/*	ip_match 3
4/* SUMMARY
5/*	IP address pattern matching
6/* SYNOPSIS
7/*	#include <ip_match.h>
8/*
9/*	char	*ip_match_parse(byte_codes, pattern)
10/*	VSTRING	*byte_codes;
11/*	char	*pattern;
12/*
13/*	char	*ip_match_save(byte_codes)
14/*	const VSTRING *byte_codes;
15/*
16/*	int	ip_match_execute(byte_codes, addr_bytes)
17/*	cost char *byte_codes;
18/*	const char *addr_bytes;
19/*
20/*	char	*ip_match_dump(printable, byte_codes)
21/*	VSTRING	*printable;
22/*	const char *byte_codes;
23/* DESCRIPTION
24/*	This module supports IP address pattern matching. See below
25/*	for a description of the supported address pattern syntax.
26/*
27/*	This implementation aims to minimize the cost of encoding
28/*	the pattern in internal form, while still providing good
29/*	matching performance in the typical case.   The first byte
30/*	of an encoded pattern specifies the expected address family
31/*	(for example, AF_INET); other details of the encoding are
32/*	private and are subject to change.
33/*
34/*	ip_match_parse() converts the user-specified pattern to
35/*	internal form. The result value is a null pointer in case
36/*	of success, or a pointer into the byte_codes buffer with a
37/*	detailed problem description.
38/*
39/*	ip_match_save() saves the result from ip_match_parse() for
40/*	longer-term usage. The result should be passed to myfree().
41/*
42/*	ip_match_execute() matches a binary network in addr_bytes
43/*	against a byte-code array in byte_codes. It is an error to
44/*	use different address families for the byte_codes and addr_bytes
45/*	arguments (the first byte-code value contains the expected
46/*	address family).  The result is non-zero in case of success.
47/*
48/*	ip_match_dump() produces an ASCII dump of a byte-code array.
49/*	The dump is supposed to be identical to the input pattern
50/*	modulo upper/lower case or leading nulls with IPv6).  This
51/*	function is primarily a debugging aid.
52/*
53/*	Arguments
54/* .IP addr_bytes
55/*	Binary network address in network-byte order.
56/* .IP byte_codes
57/*	Byte-code array produced by ip_match_parse().
58/* .IP pattern
59/*	Human-readable address pattern.
60/* .IP printable
61/*	storage for ASCII dump of a byte-code array.
62/* IPV4 PATTERN SYNTAX
63/* .ad
64/* .fi
65/*	An IPv4 address pattern has four fields separated by ".".
66/*	Each field is either a decimal number, or a sequence inside
67/*	"[]" that contains one or more ";"-separated decimal
68/*	numbers or number..number ranges.
69/*
70/*	Examples of patterns are 1.2.3.4 (matches itself, as one
71/*	would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4,
72/*	1.2.3.6, 1.2.3.7, 1.2.3.8).
73/*
74/*	Thus, any pattern field can be a sequence inside "[]", but
75/*	a "[]" sequence cannot span multiple address fields, and
76/*	a pattern field cannot contain both a number and a "[]"
77/*	sequence at the same time.
78/*
79/*	This means that the pattern 1.2.[3.4] is not valid (the
80/*	sequence [3.4] cannot span two address fields) and the
81/*	pattern 1.2.3.3[6..9] is also not valid (the last field
82/*	cannot be both number 3 and sequence [6..9] at the same
83/*	time).
84/*
85/*	The syntax for IPv4 patterns is as follows:
86/*
87/* .in +5
88/*	v4pattern = v4field "." v4field "." v4field "." v4field
89/* .br
90/*	v4field = v4octet | "[" v4sequence "]"
91/* .br
92/*	v4octet = any decimal number in the range 0 through 255
93/* .br
94/*	v4sequence = v4seq_member | v4sequence ";" v4seq_member
95/* .br
96/*	v4seq_member = v4octet | v4octet ".." v4octet
97/* .in
98/* LICENSE
99/* .ad
100/* .fi
101/*	The Secure Mailer license must be distributed with this
102/*	software.
103/* AUTHOR(S)
104/*	Wietse Venema
105/*	IBM T.J. Watson Research
106/*	P.O. Box 704
107/*	Yorktown Heights, NY 10598, USA
108/*--*/
109
110/* System library. */
111
112#include <sys_defs.h>
113#include <sys/socket.h>
114#include <ctype.h>
115#include <string.h>
116
117/* Utility library. */
118
119#include <msg.h>
120#include <mymalloc.h>
121#include <vstring.h>
122#include <ip_match.h>
123
124 /*
125  * Token values. The in-band values are also used as byte-code values.
126  */
127#define IP_MATCH_CODE_OPEN	'['	/* in-band */
128#define IP_MATCH_CODE_CLOSE	']'	/* in-band */
129#define IP_MATCH_CODE_OVAL	'N'	/* in-band */
130#define IP_MATCH_CODE_RANGE	'R'	/* in-band */
131#define IP_MATCH_CODE_EOF	'\0'	/* in-band */
132#define IP_MATCH_CODE_ERR	256	/* out-of-band */
133
134 /*
135  * SLMs.
136  */
137#define STR	vstring_str
138#define LEN	VSTRING_LEN
139
140/* ip_match_save - make longer-term copy of byte code */
141
142char   *ip_match_save(const VSTRING *byte_codes)
143{
144    char   *dst;
145
146    dst = mymalloc(LEN(byte_codes));
147    return (memcpy(dst, STR(byte_codes), LEN(byte_codes)));
148}
149
150/* ip_match_dump - byte-code pretty printer */
151
152char   *ip_match_dump(VSTRING *printable, const char *byte_codes)
153{
154    const char *myname = "ip_match_dump";
155    const unsigned char *bp;
156    int     octet_count = 0;
157    int     ch;
158
159    /*
160     * Sanity check. Use different dumping loops for AF_INET and AF_INET6.
161     */
162    if (*byte_codes != AF_INET)
163	msg_panic("%s: malformed byte-code header", myname);
164
165    /*
166     * Pretty-print and sanity-check the byte codes. Note: the loops in this
167     * code have no auto-increment at the end of the iteration. Instead, each
168     * byte-code handler bumps the byte-code pointer appropriately.
169     */
170    VSTRING_RESET(printable);
171    bp = (const unsigned char *) byte_codes + 1;
172    for (;;) {
173
174	/*
175	 * Simple numeric field.
176	 */
177	if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
178	    vstring_sprintf_append(printable, "%d", *bp);
179	    bp += 1;
180	}
181
182	/*
183	 * Wild-card numeric field.
184	 */
185	else if (ch == IP_MATCH_CODE_OPEN) {
186	    vstring_sprintf_append(printable, "[");
187	    for (;;) {
188		/* Numeric range. */
189		if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
190		    vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]);
191		    bp += 2;
192		}
193		/* Number. */
194		else if (ch == IP_MATCH_CODE_OVAL) {
195		    vstring_sprintf_append(printable, "%d", *bp);
196		    bp += 1;
197		}
198		/* End-of-wildcard. */
199		else if (ch == IP_MATCH_CODE_CLOSE) {
200		    break;
201		}
202		/* Corruption. */
203		else {
204		    msg_panic("%s: unexpected byte code (decimal %d) "
205			      "after \"%s\"", myname, ch, STR(printable));
206		}
207		/* Output the wild-card field separator and repeat the loop. */
208		if (*bp != IP_MATCH_CODE_CLOSE)
209		    vstring_sprintf_append(printable, ";");
210	    }
211	    vstring_sprintf_append(printable, "]");
212	}
213
214	/*
215	 * Corruption.
216	 */
217	else {
218	    msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
219		      myname, ch, STR(printable));
220	}
221
222	/*
223	 * Require four octets, not one more, not one less.
224	 */
225	if (++octet_count == 4) {
226	    if (*bp != 0)
227		msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
228			  myname, ch, STR(printable));
229	    return (STR(printable));
230	}
231	if (*bp == 0)
232	    msg_panic("%s: truncated byte code after \"%s\"",
233		      myname, STR(printable));
234
235	/*
236	 * Output the address field separator and repeat the loop.
237	 */
238	vstring_sprintf_append(printable, ".");
239    }
240}
241
242/* ip_match_print_code_prefix - printable byte-code prefix */
243
244static char *ip_match_print_code_prefix(const char *byte_codes, size_t len)
245{
246    static VSTRING *printable = 0;
247    const char *fmt;
248    const char *bp;
249
250    /*
251     * This is primarily for emergency debugging so we don't care about
252     * non-reentrancy.
253     */
254    if (printable == 0)
255	printable = vstring_alloc(100);
256    else
257	VSTRING_RESET(printable);
258
259    /*
260     * Use decimal for IPv4 and hexadecimal otherwise, so that address octet
261     * values are easy to recognize.
262     */
263    fmt = (*byte_codes == AF_INET ? "%d " : "%02x ");
264    for (bp = byte_codes; bp < byte_codes + len; bp++)
265	vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp);
266
267    return (STR(printable));
268}
269
270/* ip_match_execute - byte-code matching engine */
271
272int     ip_match_execute(const char *byte_codes, const char *addr_bytes)
273{
274    const char *myname = "ip_match_execute";
275    const unsigned char *bp;
276    const unsigned char *ap;
277    int     octet_count = 0;
278    int     ch;
279    int     matched;
280
281    /*
282     * Sanity check. Use different execute loops for AF_INET and AF_INET6.
283     */
284    if (*byte_codes != AF_INET)
285	msg_panic("%s: malformed byte-code header (decimal %d)",
286		  myname, *(const unsigned char *) byte_codes);
287
288    /*
289     * Match the address bytes against the byte codes. Avoid problems with
290     * (char -> int) sign extension on architectures with signed characters.
291     */
292    bp = (const unsigned char *) byte_codes + 1;
293    ap = (const unsigned char *) addr_bytes;
294
295    for (octet_count = 0; octet_count < 4; octet_count++, ap++) {
296
297	/*
298	 * Simple numeric field.
299	 */
300	if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
301	    if (*ap == *bp)
302		bp += 1;
303	    else
304		return (0);
305	}
306
307	/*
308	 * Wild-card numeric field.
309	 */
310	else if (ch == IP_MATCH_CODE_OPEN) {
311	    matched = 0;
312	    for (;;) {
313		/* Numeric range. */
314		if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
315		    if (!matched)
316			matched = (*ap >= bp[0] && *ap <= bp[1]);
317		    bp += 2;
318		}
319		/* Number. */
320		else if (ch == IP_MATCH_CODE_OVAL) {
321		    if (!matched)
322			matched = (*ap == *bp);
323		    bp += 1;
324		}
325		/* End-of-wildcard. */
326		else if (ch == IP_MATCH_CODE_CLOSE) {
327		    break;
328		}
329		/* Corruption. */
330		else {
331		    size_t  len = (const char *) bp - byte_codes - 1;
332
333		    msg_panic("%s: unexpected byte code (decimal %d) "
334			      "after \"%s\"", myname, ch,
335			      ip_match_print_code_prefix(byte_codes, len));
336		}
337	    }
338	    if (matched == 0)
339		return (0);
340	}
341
342	/*
343	 * Corruption.
344	 */
345	else {
346	    size_t  len = (const char *) bp - byte_codes - 1;
347
348	    msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
349		   myname, ch, ip_match_print_code_prefix(byte_codes, len));
350	}
351    }
352    return (1);
353}
354
355/* ip_match_next_token - carve out the next token from input pattern */
356
357static int ip_match_next_token(char **pstart, char **psaved_start, int *poval)
358{
359    unsigned char *cp;
360    int     oval;			/* octet value */
361    int     type;			/* token value */
362
363    /*
364     * Return a literal, error, or EOF token. Update the read pointer to the
365     * start of the next token or leave it at the string terminator.
366     */
367#define IP_MATCH_RETURN_TOK(next, type) \
368    do { *pstart = (char *) (next); return (type); } while (0)
369
370    /*
371     * Return a token that contains an IPv4 address octet value.
372     */
373#define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \
374	*poval = (oval); IP_MATCH_RETURN_TOK((next), type); \
375    } while (0)
376
377    /*
378     * Light-weight tokenizer. Each result is an IPv4 address octet value, a
379     * literal character value, error, or EOF.
380     */
381    *psaved_start = *pstart;
382    cp = (unsigned char *) *pstart;
383    if (ISDIGIT(*cp)) {
384	oval = *cp - '0';
385	type = IP_MATCH_CODE_OVAL;
386	for (cp += 1; ISDIGIT(*cp); cp++) {
387	    oval *= 10;
388	    oval += *cp - '0';
389	    if (oval > 255)
390		type = IP_MATCH_CODE_ERR;
391	}
392	IP_MATCH_RETURN_TOK_VAL(cp, type, oval);
393    } else {
394	IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp);
395    }
396}
397
398/* ipmatch_print_parse_error - formatted parsing error, with context */
399
400static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply,
401						               char *start,
402						               char *here,
403						               char *next,
404						        const char *fmt,...)
405{
406    va_list ap;
407    int     start_width;
408    int     here_width;
409
410    /*
411     * Format the error type.
412     */
413    va_start(ap, fmt);
414    vstring_vsprintf(reply, fmt, ap);
415    va_end(ap);
416
417    /*
418     * Format the error context. The syntax is complex enough that it is
419     * worth the effort to precisely indicate what input is in error.
420     *
421     * XXX Workaround for %.*s to avoid output when a zero width is specified.
422     */
423    if (start != 0) {
424	start_width = here - start;
425	here_width = next - here;
426	vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"",
427			       start_width, start_width == 0 ? "" : start,
428			     here_width, here_width == 0 ? "" : here, next);
429    }
430}
431
432/* ip_match_parse - parse an entire wild-card address pattern */
433
434char   *ip_match_parse(VSTRING *byte_codes, char *pattern)
435{
436    int     octet_count;
437    char   *saved_cp;
438    char   *cp;
439    int     token_type;
440    int     look_ahead;
441    int     oval;
442    int     saved_oval;
443
444    /*
445     * Simplify this if we change to {} for wildcard notation.
446     */
447#define FIND_TERMINATOR(start, cp) do { \
448	int _level = 0; \
449	for (cp = (start) ; *cp; cp++) { \
450	    if (*cp == '[') _level++; \
451	    if (*cp != ']') continue; \
452	    if (--_level == 0) break; \
453	} \
454    } while (0)
455
456    /*
457     * Strip [] around the entire pattern.
458     */
459    if (*pattern == '[') {
460	FIND_TERMINATOR(pattern, cp);
461	if (cp[0] == 0) {
462	    vstring_sprintf(byte_codes, "missing \"]\" character");
463	    return (STR(byte_codes));
464	}
465	if (cp[1] == 0) {
466	    *cp = 0;
467	    pattern += 1;
468	}
469    }
470
471    /*
472     * Sanity check. In this case we can't show any error context.
473     */
474    if (*pattern == 0) {
475	vstring_sprintf(byte_codes, "empty address pattern");
476	return (STR(byte_codes));
477    }
478
479    /*
480     * Simple parser with on-the-fly encoding. For now, IPv4 support only.
481     * Use different parser loops for IPv4 and IPv6.
482     */
483    VSTRING_RESET(byte_codes);
484    VSTRING_ADDCH(byte_codes, AF_INET);
485    octet_count = 0;
486    cp = pattern;
487
488    /*
489     * Require four address fields separated by ".", each field containing a
490     * numeric octet value or a sequence inside []. The loop head has no test
491     * and does not step the loop variable. The tokenizer advances the loop
492     * variable, and the loop termination logic is inside the loop.
493     */
494    for (;;) {
495	switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) {
496
497	    /*
498	     * Numeric address field.
499	     */
500	case IP_MATCH_CODE_OVAL:
501	    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
502	    VSTRING_ADDCH(byte_codes, oval);
503	    break;
504
505	    /*
506	     * Wild-card address field.
507	     */
508	case IP_MATCH_CODE_OPEN:
509	    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN);
510	    /* Require ";"-separated numbers or numeric ranges. */
511	    for (;;) {
512		token_type = ip_match_next_token(&cp, &saved_cp, &oval);
513		if (token_type == IP_MATCH_CODE_OVAL) {
514		    saved_oval = oval;
515		    look_ahead = ip_match_next_token(&cp, &saved_cp, &oval);
516		    /* Numeric range. */
517		    if (look_ahead == '.') {
518			/* Brute-force parsing. */
519			if (ip_match_next_token(&cp, &saved_cp, &oval) == '.'
520			    && ip_match_next_token(&cp, &saved_cp, &oval)
521			    == IP_MATCH_CODE_OVAL
522			    && saved_oval <= oval) {
523			    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE);
524			    VSTRING_ADDCH(byte_codes, saved_oval);
525			    VSTRING_ADDCH(byte_codes, oval);
526			    look_ahead =
527				ip_match_next_token(&cp, &saved_cp, &oval);
528			} else {
529			    ipmatch_print_parse_error(byte_codes, pattern,
530						      saved_cp, cp,
531						      "numeric range error");
532			    return (STR(byte_codes));
533			}
534		    }
535		    /* Single number. */
536		    else {
537			VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
538			VSTRING_ADDCH(byte_codes, saved_oval);
539		    }
540		    /* Require ";" or end-of-wildcard. */
541		    token_type = look_ahead;
542		    if (token_type == ';') {
543			continue;
544		    } else if (token_type == IP_MATCH_CODE_CLOSE) {
545			break;
546		    } else {
547			ipmatch_print_parse_error(byte_codes, pattern,
548						  saved_cp, cp,
549						  "need \";\" or \"%c\"",
550						  IP_MATCH_CODE_CLOSE);
551			return (STR(byte_codes));
552		    }
553		} else {
554		    ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
555					      "need decimal number 0..255");
556		    return (STR(byte_codes));
557		}
558	    }
559	    VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE);
560	    break;
561
562	    /*
563	     * Invalid field.
564	     */
565	default:
566	    ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
567				      "need decimal number 0..255 or \"%c\"",
568				      IP_MATCH_CODE_OPEN);
569	    return (STR(byte_codes));
570	}
571	octet_count += 1;
572
573	/*
574	 * Require four address fields. Not one more, not one less.
575	 */
576	if (octet_count == 4) {
577	    if (*cp != 0) {
578		(void) ip_match_next_token(&cp, &saved_cp, &oval);
579		ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
580					  "garbage after pattern");
581		return (STR(byte_codes));
582	    }
583	    VSTRING_ADDCH(byte_codes, 0);
584	    return (0);
585	}
586
587	/*
588	 * Require "." before the next address field.
589	 */
590	if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') {
591	    ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
592				      "need \".\"");
593	    return (STR(byte_codes));
594	}
595    }
596}
597
598#ifdef TEST
599
600 /*
601  * Dummy main program for regression tests.
602  */
603#include <sys/socket.h>
604#include <netinet/in.h>
605#include <arpa/inet.h>
606#include <stdlib.h>
607#include <unistd.h>
608#include <vstream.h>
609#include <vstring_vstream.h>
610#include <stringops.h>
611
612int     main(int argc, char **argv)
613{
614    VSTRING *byte_codes = vstring_alloc(100);
615    VSTRING *line_buf = vstring_alloc(100);
616    char   *bufp;
617    char   *err;
618    char   *user_pattern;
619    char   *user_address;
620    int     echo_input = !isatty(0);
621
622    /*
623     * Iterate over the input stream. The input format is a pattern, followed
624     * by optional addresses to match against.
625     */
626    while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) {
627	bufp = STR(line_buf);
628	if (echo_input) {
629	    vstream_printf("> %s\n", bufp);
630	    vstream_fflush(VSTREAM_OUT);
631	}
632	if (*bufp == '#')
633	    continue;
634	if ((user_pattern = mystrtok(&bufp, " \t")) == 0)
635	    continue;
636
637	/*
638	 * Parse and dump the pattern.
639	 */
640	if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) {
641	    vstream_printf("Error: %s\n", err);
642	} else {
643	    vstream_printf("Code: %s\n",
644			   ip_match_dump(line_buf, STR(byte_codes)));
645	}
646	vstream_fflush(VSTREAM_OUT);
647
648	/*
649	 * Match the optional patterns.
650	 */
651	while ((user_address = mystrtok(&bufp, " \t")) != 0) {
652	    struct in_addr netw_addr;
653
654	    switch (inet_pton(AF_INET, user_address, &netw_addr)) {
655	    case 1:
656		vstream_printf("Match %s: %s\n", user_address,
657			       ip_match_execute(STR(byte_codes),
658						(char *) &netw_addr.s_addr) ?
659			       "yes" : "no");
660		break;
661	    case 0:
662		vstream_printf("bad address syntax: %s\n", user_address);
663		break;
664	    case -1:
665		vstream_printf("%s: %m\n", user_address);
666		break;
667	    }
668	    vstream_fflush(VSTREAM_OUT);
669	}
670    }
671    vstring_free(line_buf);
672    vstring_free(byte_codes);
673    exit(0);
674}
675
676#endif
677