1/*++ 2/* NAME 3/* ip_match 3 4/* SUMMARY 5/* IP address pattern matching 6/* SYNOPSIS 7/* #include <ip_match.h> 8/* 9/* char *ip_match_parse(byte_codes, pattern) 10/* VSTRING *byte_codes; 11/* char *pattern; 12/* 13/* char *ip_match_save(byte_codes) 14/* const VSTRING *byte_codes; 15/* 16/* int ip_match_execute(byte_codes, addr_bytes) 17/* cost char *byte_codes; 18/* const char *addr_bytes; 19/* 20/* char *ip_match_dump(printable, byte_codes) 21/* VSTRING *printable; 22/* const char *byte_codes; 23/* DESCRIPTION 24/* This module supports IP address pattern matching. See below 25/* for a description of the supported address pattern syntax. 26/* 27/* This implementation aims to minimize the cost of encoding 28/* the pattern in internal form, while still providing good 29/* matching performance in the typical case. The first byte 30/* of an encoded pattern specifies the expected address family 31/* (for example, AF_INET); other details of the encoding are 32/* private and are subject to change. 33/* 34/* ip_match_parse() converts the user-specified pattern to 35/* internal form. The result value is a null pointer in case 36/* of success, or a pointer into the byte_codes buffer with a 37/* detailed problem description. 38/* 39/* ip_match_save() saves the result from ip_match_parse() for 40/* longer-term usage. The result should be passed to myfree(). 41/* 42/* ip_match_execute() matches a binary network in addr_bytes 43/* against a byte-code array in byte_codes. It is an error to 44/* use different address families for the byte_codes and addr_bytes 45/* arguments (the first byte-code value contains the expected 46/* address family). The result is non-zero in case of success. 47/* 48/* ip_match_dump() produces an ASCII dump of a byte-code array. 49/* The dump is supposed to be identical to the input pattern 50/* modulo upper/lower case or leading nulls with IPv6). This 51/* function is primarily a debugging aid. 52/* 53/* Arguments 54/* .IP addr_bytes 55/* Binary network address in network-byte order. 56/* .IP byte_codes 57/* Byte-code array produced by ip_match_parse(). 58/* .IP pattern 59/* Human-readable address pattern. 60/* .IP printable 61/* storage for ASCII dump of a byte-code array. 62/* IPV4 PATTERN SYNTAX 63/* .ad 64/* .fi 65/* An IPv4 address pattern has four fields separated by ".". 66/* Each field is either a decimal number, or a sequence inside 67/* "[]" that contains one or more ";"-separated decimal 68/* numbers or number..number ranges. 69/* 70/* Examples of patterns are 1.2.3.4 (matches itself, as one 71/* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4, 72/* 1.2.3.6, 1.2.3.7, 1.2.3.8). 73/* 74/* Thus, any pattern field can be a sequence inside "[]", but 75/* a "[]" sequence cannot span multiple address fields, and 76/* a pattern field cannot contain both a number and a "[]" 77/* sequence at the same time. 78/* 79/* This means that the pattern 1.2.[3.4] is not valid (the 80/* sequence [3.4] cannot span two address fields) and the 81/* pattern 1.2.3.3[6..9] is also not valid (the last field 82/* cannot be both number 3 and sequence [6..9] at the same 83/* time). 84/* 85/* The syntax for IPv4 patterns is as follows: 86/* 87/* .in +5 88/* v4pattern = v4field "." v4field "." v4field "." v4field 89/* .br 90/* v4field = v4octet | "[" v4sequence "]" 91/* .br 92/* v4octet = any decimal number in the range 0 through 255 93/* .br 94/* v4sequence = v4seq_member | v4sequence ";" v4seq_member 95/* .br 96/* v4seq_member = v4octet | v4octet ".." v4octet 97/* .in 98/* LICENSE 99/* .ad 100/* .fi 101/* The Secure Mailer license must be distributed with this 102/* software. 103/* AUTHOR(S) 104/* Wietse Venema 105/* IBM T.J. Watson Research 106/* P.O. Box 704 107/* Yorktown Heights, NY 10598, USA 108/*--*/ 109 110/* System library. */ 111 112#include <sys_defs.h> 113#include <sys/socket.h> 114#include <ctype.h> 115#include <string.h> 116 117/* Utility library. */ 118 119#include <msg.h> 120#include <mymalloc.h> 121#include <vstring.h> 122#include <ip_match.h> 123 124 /* 125 * Token values. The in-band values are also used as byte-code values. 126 */ 127#define IP_MATCH_CODE_OPEN '[' /* in-band */ 128#define IP_MATCH_CODE_CLOSE ']' /* in-band */ 129#define IP_MATCH_CODE_OVAL 'N' /* in-band */ 130#define IP_MATCH_CODE_RANGE 'R' /* in-band */ 131#define IP_MATCH_CODE_EOF '\0' /* in-band */ 132#define IP_MATCH_CODE_ERR 256 /* out-of-band */ 133 134 /* 135 * SLMs. 136 */ 137#define STR vstring_str 138#define LEN VSTRING_LEN 139 140/* ip_match_save - make longer-term copy of byte code */ 141 142char *ip_match_save(const VSTRING *byte_codes) 143{ 144 char *dst; 145 146 dst = mymalloc(LEN(byte_codes)); 147 return (memcpy(dst, STR(byte_codes), LEN(byte_codes))); 148} 149 150/* ip_match_dump - byte-code pretty printer */ 151 152char *ip_match_dump(VSTRING *printable, const char *byte_codes) 153{ 154 const char *myname = "ip_match_dump"; 155 const unsigned char *bp; 156 int octet_count = 0; 157 int ch; 158 159 /* 160 * Sanity check. Use different dumping loops for AF_INET and AF_INET6. 161 */ 162 if (*byte_codes != AF_INET) 163 msg_panic("%s: malformed byte-code header", myname); 164 165 /* 166 * Pretty-print and sanity-check the byte codes. Note: the loops in this 167 * code have no auto-increment at the end of the iteration. Instead, each 168 * byte-code handler bumps the byte-code pointer appropriately. 169 */ 170 VSTRING_RESET(printable); 171 bp = (const unsigned char *) byte_codes + 1; 172 for (;;) { 173 174 /* 175 * Simple numeric field. 176 */ 177 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { 178 vstring_sprintf_append(printable, "%d", *bp); 179 bp += 1; 180 } 181 182 /* 183 * Wild-card numeric field. 184 */ 185 else if (ch == IP_MATCH_CODE_OPEN) { 186 vstring_sprintf_append(printable, "["); 187 for (;;) { 188 /* Numeric range. */ 189 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { 190 vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]); 191 bp += 2; 192 } 193 /* Number. */ 194 else if (ch == IP_MATCH_CODE_OVAL) { 195 vstring_sprintf_append(printable, "%d", *bp); 196 bp += 1; 197 } 198 /* End-of-wildcard. */ 199 else if (ch == IP_MATCH_CODE_CLOSE) { 200 break; 201 } 202 /* Corruption. */ 203 else { 204 msg_panic("%s: unexpected byte code (decimal %d) " 205 "after \"%s\"", myname, ch, STR(printable)); 206 } 207 /* Output the wild-card field separator and repeat the loop. */ 208 if (*bp != IP_MATCH_CODE_CLOSE) 209 vstring_sprintf_append(printable, ";"); 210 } 211 vstring_sprintf_append(printable, "]"); 212 } 213 214 /* 215 * Corruption. 216 */ 217 else { 218 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 219 myname, ch, STR(printable)); 220 } 221 222 /* 223 * Require four octets, not one more, not one less. 224 */ 225 if (++octet_count == 4) { 226 if (*bp != 0) 227 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 228 myname, ch, STR(printable)); 229 return (STR(printable)); 230 } 231 if (*bp == 0) 232 msg_panic("%s: truncated byte code after \"%s\"", 233 myname, STR(printable)); 234 235 /* 236 * Output the address field separator and repeat the loop. 237 */ 238 vstring_sprintf_append(printable, "."); 239 } 240} 241 242/* ip_match_print_code_prefix - printable byte-code prefix */ 243 244static char *ip_match_print_code_prefix(const char *byte_codes, size_t len) 245{ 246 static VSTRING *printable = 0; 247 const char *fmt; 248 const char *bp; 249 250 /* 251 * This is primarily for emergency debugging so we don't care about 252 * non-reentrancy. 253 */ 254 if (printable == 0) 255 printable = vstring_alloc(100); 256 else 257 VSTRING_RESET(printable); 258 259 /* 260 * Use decimal for IPv4 and hexadecimal otherwise, so that address octet 261 * values are easy to recognize. 262 */ 263 fmt = (*byte_codes == AF_INET ? "%d " : "%02x "); 264 for (bp = byte_codes; bp < byte_codes + len; bp++) 265 vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp); 266 267 return (STR(printable)); 268} 269 270/* ip_match_execute - byte-code matching engine */ 271 272int ip_match_execute(const char *byte_codes, const char *addr_bytes) 273{ 274 const char *myname = "ip_match_execute"; 275 const unsigned char *bp; 276 const unsigned char *ap; 277 int octet_count = 0; 278 int ch; 279 int matched; 280 281 /* 282 * Sanity check. Use different execute loops for AF_INET and AF_INET6. 283 */ 284 if (*byte_codes != AF_INET) 285 msg_panic("%s: malformed byte-code header (decimal %d)", 286 myname, *(const unsigned char *) byte_codes); 287 288 /* 289 * Match the address bytes against the byte codes. Avoid problems with 290 * (char -> int) sign extension on architectures with signed characters. 291 */ 292 bp = (const unsigned char *) byte_codes + 1; 293 ap = (const unsigned char *) addr_bytes; 294 295 for (octet_count = 0; octet_count < 4; octet_count++, ap++) { 296 297 /* 298 * Simple numeric field. 299 */ 300 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { 301 if (*ap == *bp) 302 bp += 1; 303 else 304 return (0); 305 } 306 307 /* 308 * Wild-card numeric field. 309 */ 310 else if (ch == IP_MATCH_CODE_OPEN) { 311 matched = 0; 312 for (;;) { 313 /* Numeric range. */ 314 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { 315 if (!matched) 316 matched = (*ap >= bp[0] && *ap <= bp[1]); 317 bp += 2; 318 } 319 /* Number. */ 320 else if (ch == IP_MATCH_CODE_OVAL) { 321 if (!matched) 322 matched = (*ap == *bp); 323 bp += 1; 324 } 325 /* End-of-wildcard. */ 326 else if (ch == IP_MATCH_CODE_CLOSE) { 327 break; 328 } 329 /* Corruption. */ 330 else { 331 size_t len = (const char *) bp - byte_codes - 1; 332 333 msg_panic("%s: unexpected byte code (decimal %d) " 334 "after \"%s\"", myname, ch, 335 ip_match_print_code_prefix(byte_codes, len)); 336 } 337 } 338 if (matched == 0) 339 return (0); 340 } 341 342 /* 343 * Corruption. 344 */ 345 else { 346 size_t len = (const char *) bp - byte_codes - 1; 347 348 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 349 myname, ch, ip_match_print_code_prefix(byte_codes, len)); 350 } 351 } 352 return (1); 353} 354 355/* ip_match_next_token - carve out the next token from input pattern */ 356 357static int ip_match_next_token(char **pstart, char **psaved_start, int *poval) 358{ 359 unsigned char *cp; 360 int oval; /* octet value */ 361 int type; /* token value */ 362 363 /* 364 * Return a literal, error, or EOF token. Update the read pointer to the 365 * start of the next token or leave it at the string terminator. 366 */ 367#define IP_MATCH_RETURN_TOK(next, type) \ 368 do { *pstart = (char *) (next); return (type); } while (0) 369 370 /* 371 * Return a token that contains an IPv4 address octet value. 372 */ 373#define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \ 374 *poval = (oval); IP_MATCH_RETURN_TOK((next), type); \ 375 } while (0) 376 377 /* 378 * Light-weight tokenizer. Each result is an IPv4 address octet value, a 379 * literal character value, error, or EOF. 380 */ 381 *psaved_start = *pstart; 382 cp = (unsigned char *) *pstart; 383 if (ISDIGIT(*cp)) { 384 oval = *cp - '0'; 385 type = IP_MATCH_CODE_OVAL; 386 for (cp += 1; ISDIGIT(*cp); cp++) { 387 oval *= 10; 388 oval += *cp - '0'; 389 if (oval > 255) 390 type = IP_MATCH_CODE_ERR; 391 } 392 IP_MATCH_RETURN_TOK_VAL(cp, type, oval); 393 } else { 394 IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp); 395 } 396} 397 398/* ipmatch_print_parse_error - formatted parsing error, with context */ 399 400static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply, 401 char *start, 402 char *here, 403 char *next, 404 const char *fmt,...) 405{ 406 va_list ap; 407 int start_width; 408 int here_width; 409 410 /* 411 * Format the error type. 412 */ 413 va_start(ap, fmt); 414 vstring_vsprintf(reply, fmt, ap); 415 va_end(ap); 416 417 /* 418 * Format the error context. The syntax is complex enough that it is 419 * worth the effort to precisely indicate what input is in error. 420 * 421 * XXX Workaround for %.*s to avoid output when a zero width is specified. 422 */ 423 if (start != 0) { 424 start_width = here - start; 425 here_width = next - here; 426 vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"", 427 start_width, start_width == 0 ? "" : start, 428 here_width, here_width == 0 ? "" : here, next); 429 } 430} 431 432/* ip_match_parse - parse an entire wild-card address pattern */ 433 434char *ip_match_parse(VSTRING *byte_codes, char *pattern) 435{ 436 int octet_count; 437 char *saved_cp; 438 char *cp; 439 int token_type; 440 int look_ahead; 441 int oval; 442 int saved_oval; 443 444 /* 445 * Simplify this if we change to {} for wildcard notation. 446 */ 447#define FIND_TERMINATOR(start, cp) do { \ 448 int _level = 0; \ 449 for (cp = (start) ; *cp; cp++) { \ 450 if (*cp == '[') _level++; \ 451 if (*cp != ']') continue; \ 452 if (--_level == 0) break; \ 453 } \ 454 } while (0) 455 456 /* 457 * Strip [] around the entire pattern. 458 */ 459 if (*pattern == '[') { 460 FIND_TERMINATOR(pattern, cp); 461 if (cp[0] == 0) { 462 vstring_sprintf(byte_codes, "missing \"]\" character"); 463 return (STR(byte_codes)); 464 } 465 if (cp[1] == 0) { 466 *cp = 0; 467 pattern += 1; 468 } 469 } 470 471 /* 472 * Sanity check. In this case we can't show any error context. 473 */ 474 if (*pattern == 0) { 475 vstring_sprintf(byte_codes, "empty address pattern"); 476 return (STR(byte_codes)); 477 } 478 479 /* 480 * Simple parser with on-the-fly encoding. For now, IPv4 support only. 481 * Use different parser loops for IPv4 and IPv6. 482 */ 483 VSTRING_RESET(byte_codes); 484 VSTRING_ADDCH(byte_codes, AF_INET); 485 octet_count = 0; 486 cp = pattern; 487 488 /* 489 * Require four address fields separated by ".", each field containing a 490 * numeric octet value or a sequence inside []. The loop head has no test 491 * and does not step the loop variable. The tokenizer advances the loop 492 * variable, and the loop termination logic is inside the loop. 493 */ 494 for (;;) { 495 switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) { 496 497 /* 498 * Numeric address field. 499 */ 500 case IP_MATCH_CODE_OVAL: 501 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); 502 VSTRING_ADDCH(byte_codes, oval); 503 break; 504 505 /* 506 * Wild-card address field. 507 */ 508 case IP_MATCH_CODE_OPEN: 509 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN); 510 /* Require ";"-separated numbers or numeric ranges. */ 511 for (;;) { 512 token_type = ip_match_next_token(&cp, &saved_cp, &oval); 513 if (token_type == IP_MATCH_CODE_OVAL) { 514 saved_oval = oval; 515 look_ahead = ip_match_next_token(&cp, &saved_cp, &oval); 516 /* Numeric range. */ 517 if (look_ahead == '.') { 518 /* Brute-force parsing. */ 519 if (ip_match_next_token(&cp, &saved_cp, &oval) == '.' 520 && ip_match_next_token(&cp, &saved_cp, &oval) 521 == IP_MATCH_CODE_OVAL 522 && saved_oval <= oval) { 523 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE); 524 VSTRING_ADDCH(byte_codes, saved_oval); 525 VSTRING_ADDCH(byte_codes, oval); 526 look_ahead = 527 ip_match_next_token(&cp, &saved_cp, &oval); 528 } else { 529 ipmatch_print_parse_error(byte_codes, pattern, 530 saved_cp, cp, 531 "numeric range error"); 532 return (STR(byte_codes)); 533 } 534 } 535 /* Single number. */ 536 else { 537 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); 538 VSTRING_ADDCH(byte_codes, saved_oval); 539 } 540 /* Require ";" or end-of-wildcard. */ 541 token_type = look_ahead; 542 if (token_type == ';') { 543 continue; 544 } else if (token_type == IP_MATCH_CODE_CLOSE) { 545 break; 546 } else { 547 ipmatch_print_parse_error(byte_codes, pattern, 548 saved_cp, cp, 549 "need \";\" or \"%c\"", 550 IP_MATCH_CODE_CLOSE); 551 return (STR(byte_codes)); 552 } 553 } else { 554 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 555 "need decimal number 0..255"); 556 return (STR(byte_codes)); 557 } 558 } 559 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE); 560 break; 561 562 /* 563 * Invalid field. 564 */ 565 default: 566 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 567 "need decimal number 0..255 or \"%c\"", 568 IP_MATCH_CODE_OPEN); 569 return (STR(byte_codes)); 570 } 571 octet_count += 1; 572 573 /* 574 * Require four address fields. Not one more, not one less. 575 */ 576 if (octet_count == 4) { 577 if (*cp != 0) { 578 (void) ip_match_next_token(&cp, &saved_cp, &oval); 579 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 580 "garbage after pattern"); 581 return (STR(byte_codes)); 582 } 583 VSTRING_ADDCH(byte_codes, 0); 584 return (0); 585 } 586 587 /* 588 * Require "." before the next address field. 589 */ 590 if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') { 591 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 592 "need \".\""); 593 return (STR(byte_codes)); 594 } 595 } 596} 597 598#ifdef TEST 599 600 /* 601 * Dummy main program for regression tests. 602 */ 603#include <sys/socket.h> 604#include <netinet/in.h> 605#include <arpa/inet.h> 606#include <stdlib.h> 607#include <unistd.h> 608#include <vstream.h> 609#include <vstring_vstream.h> 610#include <stringops.h> 611 612int main(int argc, char **argv) 613{ 614 VSTRING *byte_codes = vstring_alloc(100); 615 VSTRING *line_buf = vstring_alloc(100); 616 char *bufp; 617 char *err; 618 char *user_pattern; 619 char *user_address; 620 int echo_input = !isatty(0); 621 622 /* 623 * Iterate over the input stream. The input format is a pattern, followed 624 * by optional addresses to match against. 625 */ 626 while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) { 627 bufp = STR(line_buf); 628 if (echo_input) { 629 vstream_printf("> %s\n", bufp); 630 vstream_fflush(VSTREAM_OUT); 631 } 632 if (*bufp == '#') 633 continue; 634 if ((user_pattern = mystrtok(&bufp, " \t")) == 0) 635 continue; 636 637 /* 638 * Parse and dump the pattern. 639 */ 640 if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) { 641 vstream_printf("Error: %s\n", err); 642 } else { 643 vstream_printf("Code: %s\n", 644 ip_match_dump(line_buf, STR(byte_codes))); 645 } 646 vstream_fflush(VSTREAM_OUT); 647 648 /* 649 * Match the optional patterns. 650 */ 651 while ((user_address = mystrtok(&bufp, " \t")) != 0) { 652 struct in_addr netw_addr; 653 654 switch (inet_pton(AF_INET, user_address, &netw_addr)) { 655 case 1: 656 vstream_printf("Match %s: %s\n", user_address, 657 ip_match_execute(STR(byte_codes), 658 (char *) &netw_addr.s_addr) ? 659 "yes" : "no"); 660 break; 661 case 0: 662 vstream_printf("bad address syntax: %s\n", user_address); 663 break; 664 case -1: 665 vstream_printf("%s: %m\n", user_address); 666 break; 667 } 668 vstream_fflush(VSTREAM_OUT); 669 } 670 } 671 vstring_free(line_buf); 672 vstring_free(byte_codes); 673 exit(0); 674} 675 676#endif 677