1/*++ 2/* NAME 3/* tok822_parse 3 4/* SUMMARY 5/* RFC 822 address parser 6/* SYNOPSIS 7/* #include <tok822.h> 8/* 9/* TOK822 *tok822_scan_limit(str, tailp, limit) 10/* const char *str; 11/* TOK822 **tailp; 12/* int limit; 13/* 14/* TOK822 *tok822_scan(str, tailp) 15/* const char *str; 16/* TOK822 **tailp; 17/* 18/* TOK822 *tok822_parse_limit(str, limit) 19/* const char *str; 20/* int limit; 21/* 22/* TOK822 *tok822_parse(str) 23/* const char *str; 24/* 25/* TOK822 *tok822_scan_addr(str) 26/* const char *str; 27/* 28/* VSTRING *tok822_externalize(buffer, tree, flags) 29/* VSTRING *buffer; 30/* TOK822 *tree; 31/* int flags; 32/* 33/* VSTRING *tok822_internalize(buffer, tree, flags) 34/* VSTRING *buffer; 35/* TOK822 *tree; 36/* int flags; 37/* DESCRIPTION 38/* This module converts address lists between string form and parse 39/* tree formats. The string form can appear in two different ways: 40/* external (or quoted) form, as used in message headers, and internal 41/* (unquoted) form, as used internally by the mail software. 42/* Although RFC 822 expects 7-bit data, these routines pay no 43/* special attention to 8-bit characters. 44/* 45/* tok822_scan() converts the external-form string in \fIstr\fR 46/* to a linear token list. The \fItailp\fR argument is a null pointer 47/* or receives the pointer value of the last result list element. 48/* 49/* tok822_scan_limit() implements tok822_scan(), which is a macro. 50/* The \fIlimit\fR argument is either zero or an upper bound on the 51/* number of tokens produced. 52/* 53/* tok822_parse() converts the external-form address list in 54/* \fIstr\fR to the corresponding token tree. The parser is permissive 55/* and will not throw away information that it does not understand. 56/* The parser adds missing commas between addresses. 57/* 58/* tok822_parse_limit() implements tok822_parse(), which is a macro. 59/* The \fIlimit\fR argument is either zero or an upper bound on the 60/* number of tokens produced. 61/* 62/* tok822_scan_addr() converts the external-form string in 63/* \fIstr\fR to an address token tree. This is just string to 64/* token list conversion; no parsing is done. This routine is 65/* suitable for data that should contain just one address and no 66/* other information. 67/* 68/* tok822_externalize() converts a token list to external form. 69/* Where appropriate, characters and strings are quoted and white 70/* space is inserted. The \fIflags\fR argument is the binary OR of 71/* zero or more of the following: 72/* .IP TOK822_STR_WIPE 73/* Initially, truncate the result to zero length. 74/* .IP TOK822_STR_TERM 75/* Append a null terminator to the result when done. 76/* .IP TOK822_STR_LINE 77/* Append a line break after each comma token, instead of appending 78/* whitespace. It is up to the caller to concatenate short lines to 79/* produce longer ones. 80/* .IP TOK822_STR_TRNC 81/* Truncate non-address information to 250 characters per address, to 82/* protect Sendmail systems that are vulnerable to the problem in CERT 83/* advisory CA-2003-07. 84/* This flag has effect with tok822_externalize() only. 85/* .PP 86/* The macro TOK_822_NONE expresses that none of the above features 87/* should be activated. 88/* 89/* The macro TOK822_STR_DEFL combines the TOK822_STR_WIPE and 90/* TOK822_STR_TERM flags. This is useful for most token to string 91/* conversions. 92/* 93/* The macro TOK822_STR_HEAD combines the TOK822_STR_TERM, 94/* TOK822_STR_LINE and TOK822_STR_TRNC flags. This is useful for 95/* the special case of token to mail header conversion. 96/* 97/* tok822_internalize() converts a token list to string form, 98/* without quoting. White space is inserted where appropriate. 99/* The \fIflags\fR argument is as with tok822_externalize(). 100/* STANDARDS 101/* .ad 102/* .fi 103/* RFC 822 (ARPA Internet Text Messages). In addition to this standard 104/* this module implements additional operators such as % and !. These 105/* are needed because the real world is not all RFC 822. Also, the ':' 106/* operator is allowed to appear inside addresses, to accommodate DECnet. 107/* In addition, 8-bit data is not given special treatment. 108/* LICENSE 109/* .ad 110/* .fi 111/* The Secure Mailer license must be distributed with this software. 112/* AUTHOR(S) 113/* Wietse Venema 114/* IBM T.J. Watson Research 115/* P.O. Box 704 116/* Yorktown Heights, NY 10598, USA 117/*--*/ 118 119/* System library. */ 120 121#include <sys_defs.h> 122#include <ctype.h> 123#include <string.h> 124 125/* Utility library. */ 126 127#include <vstring.h> 128#include <msg.h> 129#include <stringops.h> 130 131/* Global library. */ 132 133#include "lex_822.h" 134#include "quote_822_local.h" 135#include "tok822.h" 136 137 /* 138 * I suppose this is my favorite macro. Used heavily for tokenizing. 139 */ 140#define COLLECT(t,s,c,cond) { \ 141 while ((c = *(unsigned char *) s) != 0) { \ 142 if (c == '\\') { \ 143 if ((c = *(unsigned char *)++s) == 0) \ 144 break; \ 145 } else if (!(cond)) { \ 146 break; \ 147 } \ 148 VSTRING_ADDCH(t->vstr, IS_SPACE_TAB_CR_LF(c) ? ' ' : c); \ 149 s++; \ 150 } \ 151 VSTRING_TERMINATE(t->vstr); \ 152 } 153 154#define COLLECT_SKIP_LAST(t,s,c,cond) { COLLECT(t,s,c,cond); if (*s) s++; } 155 156 /* 157 * Not quite as complex. The parser depends heavily on it. 158 */ 159#define SKIP(tp, cond) { \ 160 while (tp->type && (cond)) \ 161 tp = tp->prev; \ 162 } 163 164#define MOVE_COMMENT_AND_CONTINUE(tp, right) { \ 165 TOK822 *prev = tok822_unlink(tp); \ 166 right = tok822_prepend(right, tp); \ 167 tp = prev; \ 168 continue; \ 169 } 170 171#define SKIP_MOVE_COMMENT(tp, cond, right) { \ 172 while (tp->type && (cond)) { \ 173 if (tp->type == TOK822_COMMENT) \ 174 MOVE_COMMENT_AND_CONTINUE(tp, right); \ 175 tp = tp->prev; \ 176 } \ 177 } 178 179 /* 180 * Single-character operators. We include the % and ! operators because not 181 * all the world is RFC822. XXX Make this operator list configurable when we 182 * have a real rewriting language. Include | for aliases file parsing. 183 */ 184static char tok822_opchar[] = "|%!" LEX_822_SPECIALS; 185static void tok822_quote_atom(TOK822 *); 186static const char *tok822_comment(TOK822 *, const char *); 187static TOK822 *tok822_group(int, TOK822 *, TOK822 *, int); 188static void tok822_copy_quoted(VSTRING *, char *, char *); 189static int tok822_append_space(TOK822 *); 190 191#define DO_WORD (1<<0) /* finding a word is ok here */ 192#define DO_GROUP (1<<1) /* doing an address group */ 193 194#define ADD_COMMA ',' /* resynchronize */ 195#define NO_MISSING_COMMA 0 196 197/* tok822_internalize - token tree to string, internal form */ 198 199VSTRING *tok822_internalize(VSTRING *vp, TOK822 *tree, int flags) 200{ 201 TOK822 *tp; 202 203 if (flags & TOK822_STR_WIPE) 204 VSTRING_RESET(vp); 205 206 for (tp = tree; tp; tp = tp->next) { 207 switch (tp->type) { 208 case ',': 209 VSTRING_ADDCH(vp, tp->type); 210 if (flags & TOK822_STR_LINE) { 211 VSTRING_ADDCH(vp, '\n'); 212 continue; 213 } 214 break; 215 case TOK822_ADDR: 216 tok822_internalize(vp, tp->head, TOK822_STR_NONE); 217 break; 218 case TOK822_COMMENT: 219 case TOK822_ATOM: 220 case TOK822_QSTRING: 221 vstring_strcat(vp, vstring_str(tp->vstr)); 222 break; 223 case TOK822_DOMLIT: 224 VSTRING_ADDCH(vp, '['); 225 vstring_strcat(vp, vstring_str(tp->vstr)); 226 VSTRING_ADDCH(vp, ']'); 227 break; 228 case TOK822_STARTGRP: 229 VSTRING_ADDCH(vp, ':'); 230 break; 231 default: 232 if (tp->type >= TOK822_MINTOK) 233 msg_panic("tok822_internalize: unknown operator %d", tp->type); 234 VSTRING_ADDCH(vp, tp->type); 235 } 236 if (tok822_append_space(tp)) 237 VSTRING_ADDCH(vp, ' '); 238 } 239 if (flags & TOK822_STR_TERM) 240 VSTRING_TERMINATE(vp); 241 return (vp); 242} 243 244/* strip_address - strip non-address text from address expression */ 245 246static void strip_address(VSTRING *vp, ssize_t start, TOK822 *addr) 247{ 248 VSTRING *tmp; 249 250 /* 251 * Emit plain <address>. Discard any comments or phrases. 252 */ 253 VSTRING_TERMINATE(vp); 254 msg_warn("stripping too many comments from address: %.100s...", 255 printable(vstring_str(vp) + start, '?')); 256 vstring_truncate(vp, start); 257 VSTRING_ADDCH(vp, '<'); 258 if (addr) { 259 tmp = vstring_alloc(100); 260 tok822_internalize(tmp, addr, TOK822_STR_TERM); 261 quote_822_local_flags(vp, vstring_str(tmp), 262 QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND); 263 vstring_free(tmp); 264 } 265 VSTRING_ADDCH(vp, '>'); 266} 267 268/* tok822_externalize - token tree to string, external form */ 269 270VSTRING *tok822_externalize(VSTRING *vp, TOK822 *tree, int flags) 271{ 272 VSTRING *tmp; 273 TOK822 *tp; 274 ssize_t start; 275 TOK822 *addr; 276 ssize_t addr_len; 277 278 /* 279 * Guard against a Sendmail buffer overflow (CERT advisory CA-2003-07). 280 * The problem was that Sendmail could store too much non-address text 281 * (comments, phrases, etc.) into a static 256-byte buffer. 282 * 283 * When the buffer fills up, fixed Sendmail versions remove comments etc. 284 * and reduce the information to just <$g>, which expands to <address>. 285 * No change is made when an address expression (text separated by 286 * commas) contains no address. This fix reportedly also protects 287 * Sendmail systems that are still vulnerable to this problem. 288 * 289 * Postfix takes the same approach, grudgingly. To avoid unnecessary damage, 290 * Postfix removes comments etc. only when the amount of non-address text 291 * in an address expression (text separated by commas) exceeds 250 bytes. 292 * 293 * With Sendmail, the address part of an address expression is the 294 * right-most <> instance in that expression. If an address expression 295 * contains no <>, then Postfix guarantees that it contains at most one 296 * non-comment string; that string is the address part of the address 297 * expression, so there is no ambiguity. 298 * 299 * Finally, we note that stress testing shows that other code in Sendmail 300 * 8.12.8 bluntly truncates ``text <address>'' to 256 bytes even when 301 * this means chopping the <address> somewhere in the middle. This is a 302 * loss of control that we're not entirely comfortable with. However, 303 * unbalanced quotes and dangling backslash do not seem to influence the 304 * way that Sendmail parses headers, so this is not an urgent problem. 305 */ 306#define MAX_NONADDR_LENGTH 250 307 308#define RESET_NONADDR_LENGTH { \ 309 start = VSTRING_LEN(vp); \ 310 addr = 0; \ 311 addr_len = 0; \ 312 } 313 314#define ENFORCE_NONADDR_LENGTH do { \ 315 if (addr && VSTRING_LEN(vp) - addr_len > start + MAX_NONADDR_LENGTH) \ 316 strip_address(vp, start, addr->head); \ 317 } while(0) 318 319 if (flags & TOK822_STR_WIPE) 320 VSTRING_RESET(vp); 321 322 if (flags & TOK822_STR_TRNC) 323 RESET_NONADDR_LENGTH; 324 325 for (tp = tree; tp; tp = tp->next) { 326 switch (tp->type) { 327 case ',': 328 if (flags & TOK822_STR_TRNC) 329 ENFORCE_NONADDR_LENGTH; 330 VSTRING_ADDCH(vp, tp->type); 331 VSTRING_ADDCH(vp, (flags & TOK822_STR_LINE) ? '\n' : ' '); 332 if (flags & TOK822_STR_TRNC) 333 RESET_NONADDR_LENGTH; 334 continue; 335 336 /* 337 * XXX In order to correctly externalize an address, it is not 338 * sufficient to quote individual atoms. There are higher-level 339 * rules that say when an address localpart needs to be quoted. 340 * We wing it with the quote_822_local() routine, which ignores 341 * the issue of atoms in the domain part that would need quoting. 342 */ 343 case TOK822_ADDR: 344 addr = tp; 345 tmp = vstring_alloc(100); 346 tok822_internalize(tmp, tp->head, TOK822_STR_TERM); 347 addr_len = VSTRING_LEN(vp); 348 quote_822_local_flags(vp, vstring_str(tmp), 349 QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND); 350 addr_len = VSTRING_LEN(vp) - addr_len; 351 vstring_free(tmp); 352 break; 353 case TOK822_ATOM: 354 case TOK822_COMMENT: 355 vstring_strcat(vp, vstring_str(tp->vstr)); 356 break; 357 case TOK822_QSTRING: 358 VSTRING_ADDCH(vp, '"'); 359 tok822_copy_quoted(vp, vstring_str(tp->vstr), "\"\\\r\n"); 360 VSTRING_ADDCH(vp, '"'); 361 break; 362 case TOK822_DOMLIT: 363 VSTRING_ADDCH(vp, '['); 364 tok822_copy_quoted(vp, vstring_str(tp->vstr), "\\\r\n"); 365 VSTRING_ADDCH(vp, ']'); 366 break; 367 case TOK822_STARTGRP: 368 VSTRING_ADDCH(vp, ':'); 369 break; 370 case '<': 371 if (tp->next && tp->next->type == '>') { 372 addr = tp; 373 addr_len = 0; 374 } 375 VSTRING_ADDCH(vp, '<'); 376 break; 377 default: 378 if (tp->type >= TOK822_MINTOK) 379 msg_panic("tok822_externalize: unknown operator %d", tp->type); 380 VSTRING_ADDCH(vp, tp->type); 381 } 382 if (tok822_append_space(tp)) 383 VSTRING_ADDCH(vp, ' '); 384 } 385 if (flags & TOK822_STR_TRNC) 386 ENFORCE_NONADDR_LENGTH; 387 388 if (flags & TOK822_STR_TERM) 389 VSTRING_TERMINATE(vp); 390 return (vp); 391} 392 393/* tok822_copy_quoted - copy a string while quoting */ 394 395static void tok822_copy_quoted(VSTRING *vp, char *str, char *quote_set) 396{ 397 int ch; 398 399 while ((ch = *(unsigned char *) str++) != 0) { 400 if (strchr(quote_set, ch)) 401 VSTRING_ADDCH(vp, '\\'); 402 VSTRING_ADDCH(vp, ch); 403 } 404} 405 406/* tok822_append_space - see if space is needed after this token */ 407 408static int tok822_append_space(TOK822 *tp) 409{ 410 TOK822 *next; 411 412 if (tp == 0 || (next = tp->next) == 0 || tp->owner != 0) 413 return (0); 414 if (tp->type == ',' || tp->type == TOK822_STARTGRP || next->type == '<') 415 return (1); 416 417#define NON_OPERATOR(x) \ 418 (x->type == TOK822_ATOM || x->type == TOK822_QSTRING \ 419 || x->type == TOK822_COMMENT || x->type == TOK822_DOMLIT \ 420 || x->type == TOK822_ADDR) 421 422 return (NON_OPERATOR(tp) && NON_OPERATOR(next)); 423} 424 425/* tok822_scan_limit - tokenize string */ 426 427TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit) 428{ 429 TOK822 *head = 0; 430 TOK822 *tail = 0; 431 TOK822 *tp; 432 int ch; 433 int tok_count = 0; 434 435 /* 436 * XXX 2822 new feature: Section 4.1 allows "." to appear in a phrase (to 437 * allow for forms such as: Johnny B. Goode <johhny@domain.org>. I cannot 438 * handle that at the tokenizer level - it is not context sensitive. And 439 * to fix this at the parser level requires radical changes to preserve 440 * white space as part of the token stream. Thanks a lot, people. 441 */ 442 while ((ch = *(unsigned char *) str++) != 0) { 443 if (IS_SPACE_TAB_CR_LF(ch)) 444 continue; 445 if (ch == '(') { 446 tp = tok822_alloc(TOK822_COMMENT, (char *) 0); 447 str = tok822_comment(tp, str); 448 } else if (ch == '[') { 449 tp = tok822_alloc(TOK822_DOMLIT, (char *) 0); 450 COLLECT_SKIP_LAST(tp, str, ch, ch != ']'); 451 } else if (ch == '"') { 452 tp = tok822_alloc(TOK822_QSTRING, (char *) 0); 453 COLLECT_SKIP_LAST(tp, str, ch, ch != '"'); 454 } else if (ch != '\\' && strchr(tok822_opchar, ch)) { 455 tp = tok822_alloc(ch, (char *) 0); 456 } else { 457 tp = tok822_alloc(TOK822_ATOM, (char *) 0); 458 str -= 1; /* \ may be first */ 459 COLLECT(tp, str, ch, !IS_SPACE_TAB_CR_LF(ch) && !strchr(tok822_opchar, ch)); 460 tok822_quote_atom(tp); 461 } 462 if (head == 0) { 463 head = tail = tp; 464 while (tail->next) 465 tail = tail->next; 466 } else { 467 tail = tok822_append(tail, tp); 468 } 469 if (tok_count_limit > 0 && ++tok_count >= tok_count_limit) 470 break; 471 } 472 if (tailp) 473 *tailp = tail; 474 return (head); 475} 476 477/* tok822_parse_limit - translate external string to token tree */ 478 479TOK822 *tok822_parse_limit(const char *str, int tok_count_limit) 480{ 481 TOK822 *head; 482 TOK822 *tail; 483 TOK822 *right; 484 TOK822 *first_token; 485 TOK822 *last_token; 486 TOK822 *tp; 487 int state; 488 489 /* 490 * First, tokenize the string, from left to right. We are not allowed to 491 * throw away any information that we do not understand. With a flat 492 * token list that contains all tokens, we can always convert back to 493 * string form. 494 */ 495 if ((first_token = tok822_scan_limit(str, &last_token, tok_count_limit)) == 0) 496 return (0); 497 498 /* 499 * For convenience, sandwich the token list between two sentinel tokens. 500 */ 501#define GLUE(left,rite) { left->next = rite; rite->prev = left; } 502 503 head = tok822_alloc(0, (char *) 0); 504 GLUE(head, first_token); 505 tail = tok822_alloc(0, (char *) 0); 506 GLUE(last_token, tail); 507 508 /* 509 * Next step is to transform the token list into a parse tree. This is 510 * done most conveniently from right to left. If there is something that 511 * we do not understand, just leave it alone, don't throw it away. The 512 * address information that we're looking for sits in-between the current 513 * node (tp) and the one called right. Add missing commas on the fly. 514 */ 515 state = DO_WORD; 516 right = tail; 517 tp = tail->prev; 518 while (tp->type) { 519 if (tp->type == TOK822_COMMENT) { /* move comment to the side */ 520 MOVE_COMMENT_AND_CONTINUE(tp, right); 521 } else if (tp->type == ';') { /* rh side of named group */ 522 right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA); 523 state = DO_GROUP | DO_WORD; 524 } else if (tp->type == ':' && (state & DO_GROUP) != 0) { 525 tp->type = TOK822_STARTGRP; 526 (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 527 SKIP(tp, tp->type != ','); 528 right = tp; 529 continue; 530 } else if (tp->type == '>') { /* rh side of <route> */ 531 right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA); 532 SKIP_MOVE_COMMENT(tp, tp->type != '<', right); 533 (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 534 SKIP(tp, tp->type > 0xff || strchr(">;,:", tp->type) == 0); 535 right = tp; 536 state |= DO_WORD; 537 continue; 538 } else if (tp->type == TOK822_ATOM || tp->type == TOK822_QSTRING 539 || tp->type == TOK822_DOMLIT) { 540 if ((state & DO_WORD) == 0) 541 right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA)->next; 542 state &= ~DO_WORD; 543 } else if (tp->type == ',') { 544 right = tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 545 state |= DO_WORD; 546 } else { 547 state |= DO_WORD; 548 } 549 tp = tp->prev; 550 } 551 (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 552 553 /* 554 * Discard the sentinel tokens on the left and right extremes. Properly 555 * terminate the resulting list. 556 */ 557 tp = (head->next != tail ? head->next : 0); 558 tok822_cut_before(head->next); 559 tok822_free(head); 560 tok822_cut_before(tail); 561 tok822_free(tail); 562 return (tp); 563} 564 565/* tok822_quote_atom - see if an atom needs quoting when externalized */ 566 567static void tok822_quote_atom(TOK822 *tp) 568{ 569 char *cp; 570 int ch; 571 572 /* 573 * RFC 822 expects 7-bit data. Rather than quoting every 8-bit character 574 * (and still passing it on as 8-bit data) we leave 8-bit data alone. 575 */ 576 for (cp = vstring_str(tp->vstr); (ch = *(unsigned char *) cp) != 0; cp++) { 577 if ( /* !ISASCII(ch) || */ ch == ' ' 578 || ISCNTRL(ch) || strchr(tok822_opchar, ch)) { 579 tp->type = TOK822_QSTRING; 580 break; 581 } 582 } 583} 584 585/* tok822_comment - tokenize comment */ 586 587static const char *tok822_comment(TOK822 *tp, const char *str) 588{ 589 int level = 1; 590 int ch; 591 592 /* 593 * XXX We cheat by storing comments in their external form. Otherwise it 594 * would be a royal pain to preserve \ before (. That would require a 595 * recursive parser; the easy to implement stack-based recursion would be 596 * too expensive. 597 */ 598 VSTRING_ADDCH(tp->vstr, '('); 599 600 while ((ch = *(unsigned char *) str) != 0) { 601 VSTRING_ADDCH(tp->vstr, ch); 602 str++; 603 if (ch == '(') { /* comments can nest! */ 604 level++; 605 } else if (ch == ')') { 606 if (--level == 0) 607 break; 608 } else if (ch == '\\') { 609 if ((ch = *(unsigned char *) str) == 0) 610 break; 611 VSTRING_ADDCH(tp->vstr, ch); 612 str++; 613 } 614 } 615 VSTRING_TERMINATE(tp->vstr); 616 return (str); 617} 618 619/* tok822_group - cluster a group of tokens */ 620 621static TOK822 *tok822_group(int group_type, TOK822 *left, TOK822 *right, int sync_type) 622{ 623 TOK822 *group; 624 TOK822 *sync; 625 TOK822 *first; 626 627 /* 628 * Cluster the tokens between left and right under their own parse tree 629 * node. Optionally insert a resync token. 630 */ 631 if (left != right && (first = left->next) != right) { 632 tok822_cut_before(right); 633 tok822_cut_before(first); 634 group = tok822_alloc(group_type, (char *) 0); 635 tok822_sub_append(group, first); 636 tok822_append(left, group); 637 tok822_append(group, right); 638 if (sync_type) { 639 sync = tok822_alloc(sync_type, (char *) 0); 640 tok822_append(left, sync); 641 } 642 } 643 return (left); 644} 645 646/* tok822_scan_addr - convert external address string to address token */ 647 648TOK822 *tok822_scan_addr(const char *addr) 649{ 650 TOK822 *tree = tok822_alloc(TOK822_ADDR, (char *) 0); 651 652 tree->head = tok822_scan(addr, &tree->tail); 653 return (tree); 654} 655 656#ifdef TEST 657 658#include <unistd.h> 659#include <vstream.h> 660#include <readlline.h> 661 662/* tok822_print - display token */ 663 664static void tok822_print(TOK822 *list, int indent) 665{ 666 TOK822 *tp; 667 668 for (tp = list; tp; tp = tp->next) { 669 if (tp->type < TOK822_MINTOK) { 670 vstream_printf("%*s %s \"%c\"\n", indent, "", "OP", tp->type); 671 } else if (tp->type == TOK822_ADDR) { 672 vstream_printf("%*s %s\n", indent, "", "address"); 673 tok822_print(tp->head, indent + 2); 674 } else if (tp->type == TOK822_STARTGRP) { 675 vstream_printf("%*s %s\n", indent, "", "group \":\""); 676 } else { 677 vstream_printf("%*s %s \"%s\"\n", indent, "", 678 tp->type == TOK822_COMMENT ? "comment" : 679 tp->type == TOK822_ATOM ? "atom" : 680 tp->type == TOK822_QSTRING ? "quoted string" : 681 tp->type == TOK822_DOMLIT ? "domain literal" : 682 tp->type == TOK822_ADDR ? "address" : 683 "unknown\n", vstring_str(tp->vstr)); 684 } 685 } 686} 687 688int main(int unused_argc, char **unused_argv) 689{ 690 VSTRING *vp = vstring_alloc(100); 691 TOK822 *list; 692 VSTRING *buf = vstring_alloc(100); 693 694#define TEST_TOKEN_LIMIT 20 695 696 while (readlline(buf, VSTREAM_IN, (int *) 0)) { 697 while (VSTRING_LEN(buf) > 0 && vstring_end(buf)[-1] == '\n') { 698 vstring_end(buf)[-1] = 0; 699 vstring_truncate(buf, VSTRING_LEN(buf) - 1); 700 } 701 if (!isatty(vstream_fileno(VSTREAM_IN))) 702 vstream_printf(">>>%s<<<\n\n", vstring_str(buf)); 703 list = tok822_parse_limit(vstring_str(buf), TEST_TOKEN_LIMIT); 704 vstream_printf("Parse tree:\n"); 705 tok822_print(list, 0); 706 vstream_printf("\n"); 707 708 vstream_printf("Internalized:\n%s\n\n", 709 vstring_str(tok822_internalize(vp, list, TOK822_STR_DEFL))); 710 vstream_fflush(VSTREAM_OUT); 711 vstream_printf("Externalized, no newlines inserted:\n%s\n\n", 712 vstring_str(tok822_externalize(vp, list, 713 TOK822_STR_DEFL | TOK822_STR_TRNC))); 714 vstream_fflush(VSTREAM_OUT); 715 vstream_printf("Externalized, newlines inserted:\n%s\n\n", 716 vstring_str(tok822_externalize(vp, list, 717 TOK822_STR_DEFL | TOK822_STR_LINE | TOK822_STR_TRNC))); 718 vstream_fflush(VSTREAM_OUT); 719 tok822_free_tree(list); 720 } 721 vstring_free(vp); 722 vstring_free(buf); 723 return (0); 724} 725 726#endif 727