1/* $NetBSD$ */ 2 3/*++ 4/* NAME 5/* ip_match 3 6/* SUMMARY 7/* IP address pattern matching 8/* SYNOPSIS 9/* #include <ip_match.h> 10/* 11/* char *ip_match_parse(byte_codes, pattern) 12/* VSTRING *byte_codes; 13/* char *pattern; 14/* 15/* char *ip_match_save(byte_codes) 16/* const VSTRING *byte_codes; 17/* 18/* int ip_match_execute(byte_codes, addr_bytes) 19/* cost char *byte_codes; 20/* const char *addr_bytes; 21/* 22/* char *ip_match_dump(printable, byte_codes) 23/* VSTRING *printable; 24/* const char *byte_codes; 25/* DESCRIPTION 26/* This module supports IP address pattern matching. See below 27/* for a description of the supported address pattern syntax. 28/* 29/* This implementation aims to minimize the cost of encoding 30/* the pattern in internal form, while still providing good 31/* matching performance in the typical case. The first byte 32/* of an encoded pattern specifies the expected address family 33/* (for example, AF_INET); other details of the encoding are 34/* private and are subject to change. 35/* 36/* ip_match_parse() converts the user-specified pattern to 37/* internal form. The result value is a null pointer in case 38/* of success, or a pointer into the byte_codes buffer with a 39/* detailed problem description. 40/* 41/* ip_match_save() saves the result from ip_match_parse() for 42/* longer-term usage. The result should be passed to myfree(). 43/* 44/* ip_match_execute() matches a binary network in addr_bytes 45/* against a byte-code array in byte_codes. It is an error to 46/* use different address families for the byte_codes and addr_bytes 47/* arguments (the first byte-code value contains the expected 48/* address family). The result is non-zero in case of success. 49/* 50/* ip_match_dump() produces an ASCII dump of a byte-code array. 51/* The dump is supposed to be identical to the input pattern 52/* modulo upper/lower case or leading nulls with IPv6). This 53/* function is primarily a debugging aid. 54/* 55/* Arguments 56/* .IP addr_bytes 57/* Binary network address in network-byte order. 58/* .IP byte_codes 59/* Byte-code array produced by ip_match_parse(). 60/* .IP pattern 61/* Human-readable address pattern. 62/* .IP printable 63/* storage for ASCII dump of a byte-code array. 64/* IPV4 PATTERN SYNTAX 65/* .ad 66/* .fi 67/* An IPv4 address pattern has four fields separated by ".". 68/* Each field is either a decimal number, or a sequence inside 69/* "[]" that contains one or more ";"-separated decimal 70/* numbers or number..number ranges. 71/* 72/* Examples of patterns are 1.2.3.4 (matches itself, as one 73/* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4, 74/* 1.2.3.6, 1.2.3.7, 1.2.3.8). 75/* 76/* Thus, any pattern field can be a sequence inside "[]", but 77/* a "[]" sequence cannot span multiple address fields, and 78/* a pattern field cannot contain both a number and a "[]" 79/* sequence at the same time. 80/* 81/* This means that the pattern 1.2.[3.4] is not valid (the 82/* sequence [3.4] cannot span two address fields) and the 83/* pattern 1.2.3.3[6..9] is also not valid (the last field 84/* cannot be both number 3 and sequence [6..9] at the same 85/* time). 86/* 87/* The syntax for IPv4 patterns is as follows: 88/* 89/* .in +5 90/* v4pattern = v4field "." v4field "." v4field "." v4field 91/* .br 92/* v4field = v4octet | "[" v4sequence "]" 93/* .br 94/* v4octet = any decimal number in the range 0 through 255 95/* .br 96/* v4sequence = v4seq_member | v4sequence ";" v4seq_member 97/* .br 98/* v4seq_member = v4octet | v4octet ".." v4octet 99/* .in 100/* LICENSE 101/* .ad 102/* .fi 103/* The Secure Mailer license must be distributed with this 104/* software. 105/* AUTHOR(S) 106/* Wietse Venema 107/* IBM T.J. Watson Research 108/* P.O. Box 704 109/* Yorktown Heights, NY 10598, USA 110/*--*/ 111 112/* System library. */ 113 114#include <sys_defs.h> 115#include <sys/socket.h> 116#include <ctype.h> 117#include <string.h> 118 119/* Utility library. */ 120 121#include <msg.h> 122#include <mymalloc.h> 123#include <vstring.h> 124#include <ip_match.h> 125 126 /* 127 * Token values. The in-band values are also used as byte-code values. 128 */ 129#define IP_MATCH_CODE_OPEN '[' /* in-band */ 130#define IP_MATCH_CODE_CLOSE ']' /* in-band */ 131#define IP_MATCH_CODE_OVAL 'N' /* in-band */ 132#define IP_MATCH_CODE_RANGE 'R' /* in-band */ 133#define IP_MATCH_CODE_EOF '\0' /* in-band */ 134#define IP_MATCH_CODE_ERR 256 /* out-of-band */ 135 136 /* 137 * SLMs. 138 */ 139#define STR vstring_str 140#define LEN VSTRING_LEN 141 142/* ip_match_save - make longer-term copy of byte code */ 143 144char *ip_match_save(const VSTRING *byte_codes) 145{ 146 char *dst; 147 148 dst = mymalloc(LEN(byte_codes)); 149 return (memcpy(dst, STR(byte_codes), LEN(byte_codes))); 150} 151 152/* ip_match_dump - byte-code pretty printer */ 153 154char *ip_match_dump(VSTRING *printable, const char *byte_codes) 155{ 156 const char *myname = "ip_match_dump"; 157 const unsigned char *bp; 158 int octet_count = 0; 159 int ch; 160 161 /* 162 * Sanity check. Use different dumping loops for AF_INET and AF_INET6. 163 */ 164 if (*byte_codes != AF_INET) 165 msg_panic("%s: malformed byte-code header", myname); 166 167 /* 168 * Pretty-print and sanity-check the byte codes. Note: the loops in this 169 * code have no auto-increment at the end of the iteration. Instead, each 170 * byte-code handler bumps the byte-code pointer appropriately. 171 */ 172 VSTRING_RESET(printable); 173 bp = (const unsigned char *) byte_codes + 1; 174 for (;;) { 175 176 /* 177 * Simple numeric field. 178 */ 179 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { 180 vstring_sprintf_append(printable, "%d", *bp); 181 bp += 1; 182 } 183 184 /* 185 * Wild-card numeric field. 186 */ 187 else if (ch == IP_MATCH_CODE_OPEN) { 188 vstring_sprintf_append(printable, "["); 189 for (;;) { 190 /* Numeric range. */ 191 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { 192 vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]); 193 bp += 2; 194 } 195 /* Number. */ 196 else if (ch == IP_MATCH_CODE_OVAL) { 197 vstring_sprintf_append(printable, "%d", *bp); 198 bp += 1; 199 } 200 /* End-of-wildcard. */ 201 else if (ch == IP_MATCH_CODE_CLOSE) { 202 break; 203 } 204 /* Corruption. */ 205 else { 206 msg_panic("%s: unexpected byte code (decimal %d) " 207 "after \"%s\"", myname, ch, STR(printable)); 208 } 209 /* Output the wild-card field separator and repeat the loop. */ 210 if (*bp != IP_MATCH_CODE_CLOSE) 211 vstring_sprintf_append(printable, ";"); 212 } 213 vstring_sprintf_append(printable, "]"); 214 } 215 216 /* 217 * Corruption. 218 */ 219 else { 220 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 221 myname, ch, STR(printable)); 222 } 223 224 /* 225 * Require four octets, not one more, not one less. 226 */ 227 if (++octet_count == 4) { 228 if (*bp != 0) 229 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 230 myname, ch, STR(printable)); 231 return (STR(printable)); 232 } 233 if (*bp == 0) 234 msg_panic("%s: truncated byte code after \"%s\"", 235 myname, STR(printable)); 236 237 /* 238 * Output the address field separator and repeat the loop. 239 */ 240 vstring_sprintf_append(printable, "."); 241 } 242} 243 244/* ip_match_print_code_prefix - printable byte-code prefix */ 245 246static char *ip_match_print_code_prefix(const char *byte_codes, size_t len) 247{ 248 static VSTRING *printable = 0; 249 const char *fmt; 250 const char *bp; 251 252 /* 253 * This is primarily for emergency debugging so we don't care about 254 * non-reentrancy. 255 */ 256 if (printable == 0) 257 printable = vstring_alloc(100); 258 else 259 VSTRING_RESET(printable); 260 261 /* 262 * Use decimal for IPv4 and hexadecimal otherwise, so that address octet 263 * values are easy to recognize. 264 */ 265 fmt = (*byte_codes == AF_INET ? "%d " : "%02x "); 266 for (bp = byte_codes; bp < byte_codes + len; bp++) 267 vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp); 268 269 return (STR(printable)); 270} 271 272/* ip_match_execute - byte-code matching engine */ 273 274int ip_match_execute(const char *byte_codes, const char *addr_bytes) 275{ 276 const char *myname = "ip_match_execute"; 277 const unsigned char *bp; 278 const unsigned char *ap; 279 int octet_count = 0; 280 int ch; 281 int matched; 282 283 /* 284 * Sanity check. Use different execute loops for AF_INET and AF_INET6. 285 */ 286 if (*byte_codes != AF_INET) 287 msg_panic("%s: malformed byte-code header (decimal %d)", 288 myname, *(const unsigned char *) byte_codes); 289 290 /* 291 * Match the address bytes against the byte codes. Avoid problems with 292 * (char -> int) sign extension on architectures with signed characters. 293 */ 294 bp = (const unsigned char *) byte_codes + 1; 295 ap = (const unsigned char *) addr_bytes; 296 297 for (octet_count = 0; octet_count < 4; octet_count++, ap++) { 298 299 /* 300 * Simple numeric field. 301 */ 302 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { 303 if (*ap == *bp) 304 bp += 1; 305 else 306 return (0); 307 } 308 309 /* 310 * Wild-card numeric field. 311 */ 312 else if (ch == IP_MATCH_CODE_OPEN) { 313 matched = 0; 314 for (;;) { 315 /* Numeric range. */ 316 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { 317 if (!matched) 318 matched = (*ap >= bp[0] && *ap <= bp[1]); 319 bp += 2; 320 } 321 /* Number. */ 322 else if (ch == IP_MATCH_CODE_OVAL) { 323 if (!matched) 324 matched = (*ap == *bp); 325 bp += 1; 326 } 327 /* End-of-wildcard. */ 328 else if (ch == IP_MATCH_CODE_CLOSE) { 329 break; 330 } 331 /* Corruption. */ 332 else { 333 size_t len = (const char *) bp - byte_codes - 1; 334 335 msg_panic("%s: unexpected byte code (decimal %d) " 336 "after \"%s\"", myname, ch, 337 ip_match_print_code_prefix(byte_codes, len)); 338 } 339 } 340 if (matched == 0) 341 return (0); 342 } 343 344 /* 345 * Corruption. 346 */ 347 else { 348 size_t len = (const char *) bp - byte_codes - 1; 349 350 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 351 myname, ch, ip_match_print_code_prefix(byte_codes, len)); 352 } 353 } 354 return (1); 355} 356 357/* ip_match_next_token - carve out the next token from input pattern */ 358 359static int ip_match_next_token(char **pstart, char **psaved_start, int *poval) 360{ 361 unsigned char *cp; 362 int oval; /* octet value */ 363 int type; /* token value */ 364 365 /* 366 * Return a literal, error, or EOF token. Update the read pointer to the 367 * start of the next token or leave it at the string terminator. 368 */ 369#define IP_MATCH_RETURN_TOK(next, type) \ 370 do { *pstart = (char *) (next); return (type); } while (0) 371 372 /* 373 * Return a token that contains an IPv4 address octet value. 374 */ 375#define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \ 376 *poval = (oval); IP_MATCH_RETURN_TOK((next), type); \ 377 } while (0) 378 379 /* 380 * Light-weight tokenizer. Each result is an IPv4 address octet value, a 381 * literal character value, error, or EOF. 382 */ 383 *psaved_start = *pstart; 384 cp = (unsigned char *) *pstart; 385 if (ISDIGIT(*cp)) { 386 oval = *cp - '0'; 387 type = IP_MATCH_CODE_OVAL; 388 for (cp += 1; ISDIGIT(*cp); cp++) { 389 oval *= 10; 390 oval += *cp - '0'; 391 if (oval > 255) 392 type = IP_MATCH_CODE_ERR; 393 } 394 IP_MATCH_RETURN_TOK_VAL(cp, type, oval); 395 } else { 396 IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp); 397 } 398} 399 400/* ipmatch_print_parse_error - formatted parsing error, with context */ 401 402static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply, 403 char *start, 404 char *here, 405 char *next, 406 const char *fmt,...) 407{ 408 va_list ap; 409 int start_width; 410 int here_width; 411 412 /* 413 * Format the error type. 414 */ 415 va_start(ap, fmt); 416 vstring_vsprintf(reply, fmt, ap); 417 va_end(ap); 418 419 /* 420 * Format the error context. The syntax is complex enough that it is 421 * worth the effort to precisely indicate what input is in error. 422 * 423 * XXX Workaround for %.*s to avoid output when a zero width is specified. 424 */ 425 if (start != 0) { 426 start_width = here - start; 427 here_width = next - here; 428 vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"", 429 start_width, start_width == 0 ? "" : start, 430 here_width, here_width == 0 ? "" : here, next); 431 } 432} 433 434/* ip_match_parse - parse an entire wild-card address pattern */ 435 436char *ip_match_parse(VSTRING *byte_codes, char *pattern) 437{ 438 int octet_count; 439 char *saved_cp; 440 char *cp; 441 int token_type; 442 int look_ahead; 443 int oval; 444 int saved_oval; 445 446 /* 447 * Simplify this if we change to {} for wildcard notation. 448 */ 449#define FIND_TERMINATOR(start, cp) do { \ 450 int _level = 0; \ 451 for (cp = (start) ; *cp; cp++) { \ 452 if (*cp == '[') _level++; \ 453 if (*cp != ']') continue; \ 454 if (--_level == 0) break; \ 455 } \ 456 } while (0) 457 458 /* 459 * Strip [] around the entire pattern. 460 */ 461 if (*pattern == '[') { 462 FIND_TERMINATOR(pattern, cp); 463 if (cp[0] == 0) { 464 vstring_sprintf(byte_codes, "missing \"]\" character"); 465 return (STR(byte_codes)); 466 } 467 if (cp[1] == 0) { 468 *cp = 0; 469 pattern += 1; 470 } 471 } 472 473 /* 474 * Sanity check. In this case we can't show any error context. 475 */ 476 if (*pattern == 0) { 477 vstring_sprintf(byte_codes, "empty address pattern"); 478 return (STR(byte_codes)); 479 } 480 481 /* 482 * Simple parser with on-the-fly encoding. For now, IPv4 support only. 483 * Use different parser loops for IPv4 and IPv6. 484 */ 485 VSTRING_RESET(byte_codes); 486 VSTRING_ADDCH(byte_codes, AF_INET); 487 octet_count = 0; 488 cp = pattern; 489 490 /* 491 * Require four address fields separated by ".", each field containing a 492 * numeric octet value or a sequence inside []. The loop head has no test 493 * and does not step the loop variable. The tokenizer advances the loop 494 * variable, and the loop termination logic is inside the loop. 495 */ 496 for (;;) { 497 switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) { 498 499 /* 500 * Numeric address field. 501 */ 502 case IP_MATCH_CODE_OVAL: 503 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); 504 VSTRING_ADDCH(byte_codes, oval); 505 break; 506 507 /* 508 * Wild-card address field. 509 */ 510 case IP_MATCH_CODE_OPEN: 511 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN); 512 /* Require ";"-separated numbers or numeric ranges. */ 513 for (;;) { 514 token_type = ip_match_next_token(&cp, &saved_cp, &oval); 515 if (token_type == IP_MATCH_CODE_OVAL) { 516 saved_oval = oval; 517 look_ahead = ip_match_next_token(&cp, &saved_cp, &oval); 518 /* Numeric range. */ 519 if (look_ahead == '.') { 520 /* Brute-force parsing. */ 521 if (ip_match_next_token(&cp, &saved_cp, &oval) == '.' 522 && ip_match_next_token(&cp, &saved_cp, &oval) 523 == IP_MATCH_CODE_OVAL 524 && saved_oval <= oval) { 525 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE); 526 VSTRING_ADDCH(byte_codes, saved_oval); 527 VSTRING_ADDCH(byte_codes, oval); 528 look_ahead = 529 ip_match_next_token(&cp, &saved_cp, &oval); 530 } else { 531 ipmatch_print_parse_error(byte_codes, pattern, 532 saved_cp, cp, 533 "numeric range error"); 534 return (STR(byte_codes)); 535 } 536 } 537 /* Single number. */ 538 else { 539 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); 540 VSTRING_ADDCH(byte_codes, saved_oval); 541 } 542 /* Require ";" or end-of-wildcard. */ 543 token_type = look_ahead; 544 if (token_type == ';') { 545 continue; 546 } else if (token_type == IP_MATCH_CODE_CLOSE) { 547 break; 548 } else { 549 ipmatch_print_parse_error(byte_codes, pattern, 550 saved_cp, cp, 551 "need \";\" or \"%c\"", 552 IP_MATCH_CODE_CLOSE); 553 return (STR(byte_codes)); 554 } 555 } else { 556 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 557 "need decimal number 0..255"); 558 return (STR(byte_codes)); 559 } 560 } 561 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE); 562 break; 563 564 /* 565 * Invalid field. 566 */ 567 default: 568 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 569 "need decimal number 0..255 or \"%c\"", 570 IP_MATCH_CODE_OPEN); 571 return (STR(byte_codes)); 572 } 573 octet_count += 1; 574 575 /* 576 * Require four address fields. Not one more, not one less. 577 */ 578 if (octet_count == 4) { 579 if (*cp != 0) { 580 (void) ip_match_next_token(&cp, &saved_cp, &oval); 581 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 582 "garbage after pattern"); 583 return (STR(byte_codes)); 584 } 585 VSTRING_ADDCH(byte_codes, 0); 586 return (0); 587 } 588 589 /* 590 * Require "." before the next address field. 591 */ 592 if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') { 593 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 594 "need \".\""); 595 return (STR(byte_codes)); 596 } 597 } 598} 599 600#ifdef TEST 601 602 /* 603 * Dummy main program for regression tests. 604 */ 605#include <sys/socket.h> 606#include <netinet/in.h> 607#include <arpa/inet.h> 608#include <stdlib.h> 609#include <unistd.h> 610#include <vstream.h> 611#include <vstring_vstream.h> 612#include <stringops.h> 613 614int main(int argc, char **argv) 615{ 616 VSTRING *byte_codes = vstring_alloc(100); 617 VSTRING *line_buf = vstring_alloc(100); 618 char *bufp; 619 char *err; 620 char *user_pattern; 621 char *user_address; 622 int echo_input = !isatty(0); 623 624 /* 625 * Iterate over the input stream. The input format is a pattern, followed 626 * by optional addresses to match against. 627 */ 628 while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) { 629 bufp = STR(line_buf); 630 if (echo_input) { 631 vstream_printf("> %s\n", bufp); 632 vstream_fflush(VSTREAM_OUT); 633 } 634 if (*bufp == '#') 635 continue; 636 if ((user_pattern = mystrtok(&bufp, " \t")) == 0) 637 continue; 638 639 /* 640 * Parse and dump the pattern. 641 */ 642 if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) { 643 vstream_printf("Error: %s\n", err); 644 } else { 645 vstream_printf("Code: %s\n", 646 ip_match_dump(line_buf, STR(byte_codes))); 647 } 648 vstream_fflush(VSTREAM_OUT); 649 650 /* 651 * Match the optional patterns. 652 */ 653 while ((user_address = mystrtok(&bufp, " \t")) != 0) { 654 struct in_addr netw_addr; 655 656 switch (inet_pton(AF_INET, user_address, &netw_addr)) { 657 case 1: 658 vstream_printf("Match %s: %s\n", user_address, 659 ip_match_execute(STR(byte_codes), 660 (char *) &netw_addr.s_addr) ? 661 "yes" : "no"); 662 break; 663 case 0: 664 vstream_printf("bad address syntax: %s\n", user_address); 665 break; 666 case -1: 667 vstream_printf("%s: %m\n", user_address); 668 break; 669 } 670 vstream_fflush(VSTREAM_OUT); 671 } 672 } 673 vstring_free(line_buf); 674 vstring_free(byte_codes); 675 exit(0); 676} 677 678#endif 679