1// -*- C++ -*- 2/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2004 3 Free Software Foundation, Inc. 4 Written by James Clark (jjc@jclark.com) 5 6This file is part of groff. 7 8groff is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 2, or (at your option) any later 11version. 12 13groff is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License along 19with groff; see the file COPYING. If not, write to the Free Software 20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22#include "refer.h" 23#include "refid.h" 24#include "search.h" 25#include "command.h" 26 27cset cs_field_name = csalpha; 28 29class input_item { 30 input_item *next; 31 char *filename; 32 int first_lineno; 33 string buffer; 34 const char *ptr; 35 const char *end; 36public: 37 input_item(string &, const char *, int = 1); 38 ~input_item(); 39 int get_char(); 40 int peek_char(); 41 void skip_char(); 42 int get_location(const char **, int *); 43 44 friend class input_stack; 45}; 46 47input_item::input_item(string &s, const char *fn, int ln) 48: filename(strsave(fn)), first_lineno(ln) 49{ 50 buffer.move(s); 51 ptr = buffer.contents(); 52 end = ptr + buffer.length(); 53} 54 55input_item::~input_item() 56{ 57 a_delete filename; 58} 59 60inline int input_item::peek_char() 61{ 62 if (ptr >= end) 63 return EOF; 64 else 65 return (unsigned char)*ptr; 66} 67 68inline int input_item::get_char() 69{ 70 if (ptr >= end) 71 return EOF; 72 else 73 return (unsigned char)*ptr++; 74} 75 76inline void input_item::skip_char() 77{ 78 ptr++; 79} 80 81int input_item::get_location(const char **filenamep, int *linenop) 82{ 83 *filenamep = filename; 84 if (ptr == buffer.contents()) 85 *linenop = first_lineno; 86 else { 87 int ln = first_lineno; 88 const char *e = ptr - 1; 89 for (const char *p = buffer.contents(); p < e; p++) 90 if (*p == '\n') 91 ln++; 92 *linenop = ln; 93 } 94 return 1; 95} 96 97class input_stack { 98 static input_item *top; 99public: 100 static void init(); 101 static int get_char(); 102 static int peek_char(); 103 static void skip_char() { top->skip_char(); } 104 static void push_file(const char *); 105 static void push_string(string &, const char *, int); 106 static void error(const char *format, 107 const errarg &arg1 = empty_errarg, 108 const errarg &arg2 = empty_errarg, 109 const errarg &arg3 = empty_errarg); 110}; 111 112input_item *input_stack::top = 0; 113 114void input_stack::init() 115{ 116 while (top) { 117 input_item *tem = top; 118 top = top->next; 119 delete tem; 120 } 121} 122 123int input_stack::get_char() 124{ 125 while (top) { 126 int c = top->get_char(); 127 if (c >= 0) 128 return c; 129 input_item *tem = top; 130 top = top->next; 131 delete tem; 132 } 133 return -1; 134} 135 136int input_stack::peek_char() 137{ 138 while (top) { 139 int c = top->peek_char(); 140 if (c >= 0) 141 return c; 142 input_item *tem = top; 143 top = top->next; 144 delete tem; 145 } 146 return -1; 147} 148 149void input_stack::push_file(const char *fn) 150{ 151 FILE *fp; 152 if (strcmp(fn, "-") == 0) { 153 fp = stdin; 154 fn = "<standard input>"; 155 } 156 else { 157 errno = 0; 158 fp = fopen(fn, "r"); 159 if (fp == 0) { 160 error("can't open `%1': %2", fn, strerror(errno)); 161 return; 162 } 163 } 164 string buf; 165 int bol = 1; 166 int lineno = 1; 167 for (;;) { 168 int c = getc(fp); 169 if (bol && c == '.') { 170 // replace lines beginning with .R1 or .R2 with a blank line 171 c = getc(fp); 172 if (c == 'R') { 173 c = getc(fp); 174 if (c == '1' || c == '2') { 175 int cc = c; 176 c = getc(fp); 177 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { 178 while (c != '\n' && c != EOF) 179 c = getc(fp); 180 } 181 else { 182 buf += '.'; 183 buf += 'R'; 184 buf += cc; 185 } 186 } 187 else { 188 buf += '.'; 189 buf += 'R'; 190 } 191 } 192 else 193 buf += '.'; 194 } 195 if (c == EOF) 196 break; 197 if (invalid_input_char(c)) 198 error_with_file_and_line(fn, lineno, 199 "invalid input character code %1", int(c)); 200 else { 201 buf += c; 202 if (c == '\n') { 203 bol = 1; 204 lineno++; 205 } 206 else 207 bol = 0; 208 } 209 } 210 if (fp != stdin) 211 fclose(fp); 212 if (buf.length() > 0 && buf[buf.length() - 1] != '\n') 213 buf += '\n'; 214 input_item *it = new input_item(buf, fn); 215 it->next = top; 216 top = it; 217} 218 219void input_stack::push_string(string &s, const char *filename, int lineno) 220{ 221 input_item *it = new input_item(s, filename, lineno); 222 it->next = top; 223 top = it; 224} 225 226void input_stack::error(const char *format, const errarg &arg1, 227 const errarg &arg2, const errarg &arg3) 228{ 229 const char *filename; 230 int lineno; 231 for (input_item *it = top; it; it = it->next) 232 if (it->get_location(&filename, &lineno)) { 233 error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3); 234 return; 235 } 236 ::error(format, arg1, arg2, arg3); 237} 238 239void command_error(const char *format, const errarg &arg1, 240 const errarg &arg2, const errarg &arg3) 241{ 242 input_stack::error(format, arg1, arg2, arg3); 243} 244 245// # not recognized in "" 246// \<newline> is recognized in "" 247// # does not conceal newline 248// if missing closing quote, word extends to end of line 249// no special treatment of \ other than before newline 250// \<newline> not recognized after # 251// ; allowed as alternative to newline 252// ; not recognized in "" 253// don't clear word_buffer; just append on 254// return -1 for EOF, 0 for newline, 1 for word 255 256int get_word(string &word_buffer) 257{ 258 int c = input_stack::get_char(); 259 for (;;) { 260 if (c == '#') { 261 do { 262 c = input_stack::get_char(); 263 } while (c != '\n' && c != EOF); 264 break; 265 } 266 if (c == '\\' && input_stack::peek_char() == '\n') 267 input_stack::skip_char(); 268 else if (c != ' ' && c != '\t') 269 break; 270 c = input_stack::get_char(); 271 } 272 if (c == EOF) 273 return -1; 274 if (c == '\n' || c == ';') 275 return 0; 276 if (c == '"') { 277 for (;;) { 278 c = input_stack::peek_char(); 279 if (c == EOF || c == '\n') 280 break; 281 input_stack::skip_char(); 282 if (c == '"') { 283 int d = input_stack::peek_char(); 284 if (d == '"') 285 input_stack::skip_char(); 286 else 287 break; 288 } 289 else if (c == '\\') { 290 int d = input_stack::peek_char(); 291 if (d == '\n') 292 input_stack::skip_char(); 293 else 294 word_buffer += '\\'; 295 } 296 else 297 word_buffer += c; 298 } 299 return 1; 300 } 301 word_buffer += c; 302 for (;;) { 303 c = input_stack::peek_char(); 304 if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';') 305 break; 306 input_stack::skip_char(); 307 if (c == '\\') { 308 int d = input_stack::peek_char(); 309 if (d == '\n') 310 input_stack::skip_char(); 311 else 312 word_buffer += '\\'; 313 } 314 else 315 word_buffer += c; 316 } 317 return 1; 318} 319 320union argument { 321 const char *s; 322 int n; 323}; 324 325// This is for debugging. 326 327static void echo_command(int argc, argument *argv) 328{ 329 for (int i = 0; i < argc; i++) 330 fprintf(stderr, "%s\n", argv[i].s); 331} 332 333static void include_command(int argc, argument *argv) 334{ 335 assert(argc == 1); 336 input_stack::push_file(argv[0].s); 337} 338 339static void capitalize_command(int argc, argument *argv) 340{ 341 if (argc > 0) 342 capitalize_fields = argv[0].s; 343 else 344 capitalize_fields.clear(); 345} 346 347static void accumulate_command(int, argument *) 348{ 349 accumulate = 1; 350} 351 352static void no_accumulate_command(int, argument *) 353{ 354 accumulate = 0; 355} 356 357static void move_punctuation_command(int, argument *) 358{ 359 move_punctuation = 1; 360} 361 362static void no_move_punctuation_command(int, argument *) 363{ 364 move_punctuation = 0; 365} 366 367static void sort_command(int argc, argument *argv) 368{ 369 if (argc == 0) 370 sort_fields = "AD"; 371 else 372 sort_fields = argv[0].s; 373 accumulate = 1; 374} 375 376static void no_sort_command(int, argument *) 377{ 378 sort_fields.clear(); 379} 380 381static void articles_command(int argc, argument *argv) 382{ 383 articles.clear(); 384 int i; 385 for (i = 0; i < argc; i++) { 386 articles += argv[i].s; 387 articles += '\0'; 388 } 389 int len = articles.length(); 390 for (i = 0; i < len; i++) 391 articles[i] = cmlower(articles[i]); 392} 393 394static void database_command(int argc, argument *argv) 395{ 396 for (int i = 0; i < argc; i++) 397 database_list.add_file(argv[i].s); 398} 399 400static void default_database_command(int, argument *) 401{ 402 search_default = 1; 403} 404 405static void no_default_database_command(int, argument *) 406{ 407 search_default = 0; 408} 409 410static void bibliography_command(int argc, argument *argv) 411{ 412 const char *saved_filename = current_filename; 413 int saved_lineno = current_lineno; 414 int saved_label_in_text = label_in_text; 415 label_in_text = 0; 416 if (!accumulate) 417 fputs(".]<\n", stdout); 418 for (int i = 0; i < argc; i++) 419 do_bib(argv[i].s); 420 if (accumulate) 421 output_references(); 422 else 423 fputs(".]>\n", stdout); 424 current_filename = saved_filename; 425 current_lineno = saved_lineno; 426 label_in_text = saved_label_in_text; 427} 428 429static void annotate_command(int argc, argument *argv) 430{ 431 if (argc > 0) 432 annotation_field = argv[0].s[0]; 433 else 434 annotation_field = 'X'; 435 if (argc == 2) 436 annotation_macro = argv[1].s; 437 else 438 annotation_macro = "AP"; 439} 440 441static void no_annotate_command(int, argument *) 442{ 443 annotation_macro.clear(); 444 annotation_field = -1; 445} 446 447static void reverse_command(int, argument *argv) 448{ 449 reverse_fields = argv[0].s; 450} 451 452static void no_reverse_command(int, argument *) 453{ 454 reverse_fields.clear(); 455} 456 457static void abbreviate_command(int argc, argument *argv) 458{ 459 abbreviate_fields = argv[0].s; 460 period_before_initial = argc > 1 ? argv[1].s : ". "; 461 period_before_last_name = argc > 2 ? argv[2].s : ". "; 462 period_before_other = argc > 3 ? argv[3].s : ". "; 463 period_before_hyphen = argc > 4 ? argv[4].s : "."; 464} 465 466static void no_abbreviate_command(int, argument *) 467{ 468 abbreviate_fields.clear(); 469} 470 471string search_ignore_fields; 472 473static void search_ignore_command(int argc, argument *argv) 474{ 475 if (argc > 0) 476 search_ignore_fields = argv[0].s; 477 else 478 search_ignore_fields = "XYZ"; 479 search_ignore_fields += '\0'; 480 linear_ignore_fields = search_ignore_fields.contents(); 481} 482 483static void no_search_ignore_command(int, argument *) 484{ 485 linear_ignore_fields = ""; 486} 487 488static void search_truncate_command(int argc, argument *argv) 489{ 490 if (argc > 0) 491 linear_truncate_len = argv[0].n; 492 else 493 linear_truncate_len = 6; 494} 495 496static void no_search_truncate_command(int, argument *) 497{ 498 linear_truncate_len = -1; 499} 500 501static void discard_command(int argc, argument *argv) 502{ 503 if (argc == 0) 504 discard_fields = "XYZ"; 505 else 506 discard_fields = argv[0].s; 507 accumulate = 1; 508} 509 510static void no_discard_command(int, argument *) 511{ 512 discard_fields.clear(); 513} 514 515static void label_command(int, argument *argv) 516{ 517 set_label_spec(argv[0].s); 518} 519 520static void abbreviate_label_ranges_command(int argc, argument *argv) 521{ 522 abbreviate_label_ranges = 1; 523 label_range_indicator = argc > 0 ? argv[0].s : "-"; 524} 525 526static void no_abbreviate_label_ranges_command(int, argument *) 527{ 528 abbreviate_label_ranges = 0; 529} 530 531static void label_in_reference_command(int, argument *) 532{ 533 label_in_reference = 1; 534} 535 536static void no_label_in_reference_command(int, argument *) 537{ 538 label_in_reference = 0; 539} 540 541static void label_in_text_command(int, argument *) 542{ 543 label_in_text = 1; 544} 545 546static void no_label_in_text_command(int, argument *) 547{ 548 label_in_text = 0; 549} 550 551static void sort_adjacent_labels_command(int, argument *) 552{ 553 sort_adjacent_labels = 1; 554} 555 556static void no_sort_adjacent_labels_command(int, argument *) 557{ 558 sort_adjacent_labels = 0; 559} 560 561static void date_as_label_command(int argc, argument *argv) 562{ 563 if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*")) 564 date_as_label = 1; 565} 566 567static void no_date_as_label_command(int, argument *) 568{ 569 date_as_label = 0; 570} 571 572static void short_label_command(int, argument *argv) 573{ 574 if (set_short_label_spec(argv[0].s)) 575 short_label_flag = 1; 576} 577 578static void no_short_label_command(int, argument *) 579{ 580 short_label_flag = 0; 581} 582 583static void compatible_command(int, argument *) 584{ 585 compatible_flag = 1; 586} 587 588static void no_compatible_command(int, argument *) 589{ 590 compatible_flag = 0; 591} 592 593static void join_authors_command(int argc, argument *argv) 594{ 595 join_authors_exactly_two = argv[0].s; 596 join_authors_default = argc > 1 ? argv[1].s : argv[0].s; 597 join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s; 598} 599 600static void bracket_label_command(int, argument *argv) 601{ 602 pre_label = argv[0].s; 603 post_label = argv[1].s; 604 sep_label = argv[2].s; 605} 606 607static void separate_label_second_parts_command(int, argument *argv) 608{ 609 separate_label_second_parts = argv[0].s; 610} 611 612static void et_al_command(int argc, argument *argv) 613{ 614 et_al = argv[0].s; 615 et_al_min_elide = argv[1].n; 616 if (et_al_min_elide < 1) 617 et_al_min_elide = 1; 618 et_al_min_total = argc >= 3 ? argv[2].n : 0; 619} 620 621static void no_et_al_command(int, argument *) 622{ 623 et_al.clear(); 624 et_al_min_elide = 0; 625} 626 627typedef void (*command_t)(int, argument *); 628 629/* arg_types is a string describing the numbers and types of arguments. 630s means a string, i means an integer, f is a list of fields, F is 631a single field, 632? means that the previous argument is optional, * means that the 633previous argument can occur any number of times. */ 634 635struct S { 636 const char *name; 637 command_t func; 638 const char *arg_types; 639} command_table[] = { 640 { "include", include_command, "s" }, 641 { "echo", echo_command, "s*" }, 642 { "capitalize", capitalize_command, "f?" }, 643 { "accumulate", accumulate_command, "" }, 644 { "no-accumulate", no_accumulate_command, "" }, 645 { "move-punctuation", move_punctuation_command, "" }, 646 { "no-move-punctuation", no_move_punctuation_command, "" }, 647 { "sort", sort_command, "s?" }, 648 { "no-sort", no_sort_command, "" }, 649 { "articles", articles_command, "s*" }, 650 { "database", database_command, "ss*" }, 651 { "default-database", default_database_command, "" }, 652 { "no-default-database", no_default_database_command, "" }, 653 { "bibliography", bibliography_command, "ss*" }, 654 { "annotate", annotate_command, "F?s?" }, 655 { "no-annotate", no_annotate_command, "" }, 656 { "reverse", reverse_command, "s" }, 657 { "no-reverse", no_reverse_command, "" }, 658 { "abbreviate", abbreviate_command, "ss?s?s?s?" }, 659 { "no-abbreviate", no_abbreviate_command, "" }, 660 { "search-ignore", search_ignore_command, "f?" }, 661 { "no-search-ignore", no_search_ignore_command, "" }, 662 { "search-truncate", search_truncate_command, "i?" }, 663 { "no-search-truncate", no_search_truncate_command, "" }, 664 { "discard", discard_command, "f?" }, 665 { "no-discard", no_discard_command, "" }, 666 { "label", label_command, "s" }, 667 { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" }, 668 { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" }, 669 { "label-in-reference", label_in_reference_command, "" }, 670 { "no-label-in-reference", no_label_in_reference_command, "" }, 671 { "label-in-text", label_in_text_command, "" }, 672 { "no-label-in-text", no_label_in_text_command, "" }, 673 { "sort-adjacent-labels", sort_adjacent_labels_command, "" }, 674 { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" }, 675 { "date-as-label", date_as_label_command, "s?" }, 676 { "no-date-as-label", no_date_as_label_command, "" }, 677 { "short-label", short_label_command, "s" }, 678 { "no-short-label", no_short_label_command, "" }, 679 { "compatible", compatible_command, "" }, 680 { "no-compatible", no_compatible_command, "" }, 681 { "join-authors", join_authors_command, "sss?" }, 682 { "bracket-label", bracket_label_command, "sss" }, 683 { "separate-label-second-parts", separate_label_second_parts_command, "s" }, 684 { "et-al", et_al_command, "sii?" }, 685 { "no-et-al", no_et_al_command, "" }, 686}; 687 688static int check_args(const char *types, const char *name, 689 int argc, argument *argv) 690{ 691 int argno = 0; 692 while (*types) { 693 if (argc == 0) { 694 if (types[1] == '?') 695 break; 696 else if (types[1] == '*') { 697 assert(types[2] == '\0'); 698 break; 699 } 700 else { 701 input_stack::error("missing argument for command `%1'", name); 702 return 0; 703 } 704 } 705 switch (*types) { 706 case 's': 707 break; 708 case 'i': 709 { 710 char *ptr; 711 long n = strtol(argv->s, &ptr, 10); 712 if ((n == 0 && ptr == argv->s) 713 || *ptr != '\0') { 714 input_stack::error("argument %1 for command `%2' must be an integer", 715 argno + 1, name); 716 return 0; 717 } 718 argv->n = (int)n; 719 break; 720 } 721 case 'f': 722 { 723 for (const char *ptr = argv->s; *ptr != '\0'; ptr++) 724 if (!cs_field_name(*ptr)) { 725 input_stack::error("argument %1 for command `%2' must be a list of fields", 726 argno + 1, name); 727 return 0; 728 } 729 break; 730 } 731 case 'F': 732 if (argv->s[0] == '\0' || argv->s[1] != '\0' 733 || !cs_field_name(argv->s[0])) { 734 input_stack::error("argument %1 for command `%2' must be a field name", 735 argno + 1, name); 736 return 0; 737 } 738 break; 739 default: 740 assert(0); 741 } 742 if (types[1] == '?') 743 types += 2; 744 else if (types[1] != '*') 745 types += 1; 746 --argc; 747 ++argv; 748 ++argno; 749 } 750 if (argc > 0) { 751 input_stack::error("too many arguments for command `%1'", name); 752 return 0; 753 } 754 return 1; 755} 756 757static void execute_command(const char *name, int argc, argument *argv) 758{ 759 for (unsigned int i = 0; 760 i < sizeof(command_table)/sizeof(command_table[0]); i++) 761 if (strcmp(name, command_table[i].name) == 0) { 762 if (check_args(command_table[i].arg_types, name, argc, argv)) 763 (*command_table[i].func)(argc, argv); 764 return; 765 } 766 input_stack::error("unknown command `%1'", name); 767} 768 769static void command_loop() 770{ 771 string command; 772 for (;;) { 773 command.clear(); 774 int res = get_word(command); 775 if (res != 1) { 776 if (res == 0) 777 continue; 778 break; 779 } 780 int argc = 0; 781 command += '\0'; 782 while ((res = get_word(command)) == 1) { 783 argc++; 784 command += '\0'; 785 } 786 argument *argv = new argument[argc]; 787 const char *ptr = command.contents(); 788 for (int i = 0; i < argc; i++) 789 argv[i].s = ptr = strchr(ptr, '\0') + 1; 790 execute_command(command.contents(), argc, argv); 791 a_delete argv; 792 if (res == -1) 793 break; 794 } 795} 796 797void process_commands(const char *file) 798{ 799 input_stack::init(); 800 input_stack::push_file(file); 801 command_loop(); 802} 803 804void process_commands(string &s, const char *file, int lineno) 805{ 806 input_stack::init(); 807 input_stack::push_string(s, file, lineno); 808 command_loop(); 809} 810