refer.cpp revision 114402
1// -*- C++ -*- 2/* Copyright (C) 1989-1992, 2000, 2001, 2002 Free Software Foundation, Inc. 3 Written by James Clark (jjc@jclark.com) 4 5This file is part of groff. 6 7groff is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12groff is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License along 18with groff; see the file COPYING. If not, write to the Free Software 19Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 20 21#include "refer.h" 22#include "refid.h" 23#include "ref.h" 24#include "token.h" 25#include "search.h" 26#include "command.h" 27 28extern "C" const char *Version_string; 29 30const char PRE_LABEL_MARKER = '\013'; 31const char POST_LABEL_MARKER = '\014'; 32const char LABEL_MARKER = '\015'; // label_type is added on 33 34#define FORCE_LEFT_BRACKET 04 35#define FORCE_RIGHT_BRACKET 010 36 37static FILE *outfp = stdout; 38 39string capitalize_fields; 40string reverse_fields; 41string abbreviate_fields; 42string period_before_last_name = ". "; 43string period_before_initial = "."; 44string period_before_hyphen = ""; 45string period_before_other = ". "; 46string sort_fields; 47int annotation_field = -1; 48string annotation_macro; 49string discard_fields = "XYZ"; 50string pre_label = "\\*([."; 51string post_label = "\\*(.]"; 52string sep_label = ", "; 53int accumulate = 0; 54int move_punctuation = 0; 55int abbreviate_label_ranges = 0; 56string label_range_indicator; 57int label_in_text = 1; 58int label_in_reference = 1; 59int date_as_label = 0; 60int sort_adjacent_labels = 0; 61// Join exactly two authors with this. 62string join_authors_exactly_two = " and "; 63// When there are more than two authors join the last two with this. 64string join_authors_last_two = ", and "; 65// Otherwise join authors with this. 66string join_authors_default = ", "; 67string separate_label_second_parts = ", "; 68// Use this string to represent that there are other authors. 69string et_al = " et al"; 70// Use et al only if it can replace at least this many authors. 71int et_al_min_elide = 2; 72// Use et al only if the total number of authors is at least this. 73int et_al_min_total = 3; 74 75 76int compatible_flag = 0; 77 78int short_label_flag = 0; 79 80static int recognize_R1_R2 = 1; 81 82search_list database_list; 83int search_default = 1; 84static int default_database_loaded = 0; 85 86static reference **citation = 0; 87static int ncitations = 0; 88static int citation_max = 0; 89 90static reference **reference_hash_table = 0; 91static int hash_table_size; 92static int nreferences = 0; 93 94static int need_syncing = 0; 95string pending_line; 96string pending_lf_lines; 97 98static void output_pending_line(); 99static unsigned immediately_handle_reference(const string &); 100static void immediately_output_references(); 101static unsigned store_reference(const string &); 102static void divert_to_temporary_file(); 103static reference *make_reference(const string &, unsigned *); 104static void usage(FILE *stream); 105static void do_file(const char *); 106static void split_punct(string &line, string &punct); 107static void output_citation_group(reference **v, int n, label_type, FILE *fp); 108static void possibly_load_default_database(); 109 110int main(int argc, char **argv) 111{ 112 program_name = argv[0]; 113 static char stderr_buf[BUFSIZ]; 114 setbuf(stderr, stderr_buf); 115 outfp = stdout; 116 int finished_options = 0; 117 int bib_flag = 0; 118 int done_spec = 0; 119 120 for (--argc, ++argv; 121 !finished_options && argc > 0 && argv[0][0] == '-' 122 && argv[0][1] != '\0'; 123 argv++, argc--) { 124 const char *opt = argv[0] + 1; 125 while (opt != 0 && *opt != '\0') { 126 switch (*opt) { 127 case 'C': 128 compatible_flag = 1; 129 opt++; 130 break; 131 case 'B': 132 bib_flag = 1; 133 label_in_reference = 0; 134 label_in_text = 0; 135 ++opt; 136 if (*opt == '\0') { 137 annotation_field = 'X'; 138 annotation_macro = "AP"; 139 } 140 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { 141 annotation_field = opt[0]; 142 annotation_macro = opt + 2; 143 } 144 opt = 0; 145 break; 146 case 'P': 147 move_punctuation = 1; 148 opt++; 149 break; 150 case 'R': 151 recognize_R1_R2 = 0; 152 opt++; 153 break; 154 case 'S': 155 // Not a very useful spec. 156 set_label_spec("(A.n|Q)', '(D.y|D)"); 157 done_spec = 1; 158 pre_label = " ("; 159 post_label = ")"; 160 sep_label = "; "; 161 opt++; 162 break; 163 case 'V': 164 verify_flag = 1; 165 opt++; 166 break; 167 case 'f': 168 { 169 const char *num = 0; 170 if (*++opt == '\0') { 171 if (argc > 1) { 172 num = *++argv; 173 --argc; 174 } 175 else { 176 error("option `f' requires an argument"); 177 usage(stderr); 178 exit(1); 179 } 180 } 181 else { 182 num = opt; 183 opt = 0; 184 } 185 const char *ptr; 186 for (ptr = num; *ptr; ptr++) 187 if (!csdigit(*ptr)) { 188 error("bad character `%1' in argument to -f option", *ptr); 189 break; 190 } 191 if (*ptr == '\0') { 192 string spec; 193 spec = '%'; 194 spec += num; 195 spec += '\0'; 196 set_label_spec(spec.contents()); 197 done_spec = 1; 198 } 199 break; 200 } 201 case 'b': 202 label_in_text = 0; 203 label_in_reference = 0; 204 opt++; 205 break; 206 case 'e': 207 accumulate = 1; 208 opt++; 209 break; 210 case 'c': 211 capitalize_fields = ++opt; 212 opt = 0; 213 break; 214 case 'k': 215 { 216 char buf[5]; 217 if (csalpha(*++opt)) 218 buf[0] = *opt++; 219 else { 220 if (*opt != '\0') 221 error("bad field name `%1'", *opt++); 222 buf[0] = 'L'; 223 } 224 buf[1] = '~'; 225 buf[2] = '%'; 226 buf[3] = 'a'; 227 buf[4] = '\0'; 228 set_label_spec(buf); 229 done_spec = 1; 230 } 231 break; 232 case 'a': 233 { 234 const char *ptr; 235 for (ptr = ++opt; *ptr; ptr++) 236 if (!csdigit(*ptr)) { 237 error("argument to `a' option not a number"); 238 break; 239 } 240 if (*ptr == '\0') { 241 reverse_fields = 'A'; 242 reverse_fields += opt; 243 } 244 opt = 0; 245 } 246 break; 247 case 'i': 248 linear_ignore_fields = ++opt; 249 opt = 0; 250 break; 251 case 'l': 252 { 253 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a 254 strcpy(buf, "A.n"); 255 if (*++opt != '\0' && *opt != ',') { 256 char *ptr; 257 long n = strtol(opt, &ptr, 10); 258 if (n == 0 && ptr == opt) { 259 error("bad integer `%1' in `l' option", opt); 260 opt = 0; 261 break; 262 } 263 if (n < 0) 264 n = 0; 265 opt = ptr; 266 sprintf(strchr(buf, '\0'), "+%ld", n); 267 } 268 strcat(buf, "D.y"); 269 if (*opt == ',') 270 opt++; 271 if (*opt != '\0') { 272 char *ptr; 273 long n = strtol(opt, &ptr, 10); 274 if (n == 0 && ptr == opt) { 275 error("bad integer `%1' in `l' option", opt); 276 opt = 0; 277 break; 278 } 279 if (n < 0) 280 n = 0; 281 sprintf(strchr(buf, '\0'), "-%ld", n); 282 opt = ptr; 283 if (*opt != '\0') 284 error("argument to `l' option not of form `m,n'"); 285 } 286 strcat(buf, "%a"); 287 if (!set_label_spec(buf)) 288 assert(0); 289 done_spec = 1; 290 } 291 break; 292 case 'n': 293 search_default = 0; 294 opt++; 295 break; 296 case 'p': 297 { 298 const char *filename = 0; 299 if (*++opt == '\0') { 300 if (argc > 1) { 301 filename = *++argv; 302 argc--; 303 } 304 else { 305 error("option `p' requires an argument"); 306 usage(stderr); 307 exit(1); 308 } 309 } 310 else { 311 filename = opt; 312 opt = 0; 313 } 314 database_list.add_file(filename); 315 } 316 break; 317 case 's': 318 if (*++opt == '\0') 319 sort_fields = "AD"; 320 else { 321 sort_fields = opt; 322 opt = 0; 323 } 324 accumulate = 1; 325 break; 326 case 't': 327 { 328 char *ptr; 329 long n = strtol(opt, &ptr, 10); 330 if (n == 0 && ptr == opt) { 331 error("bad integer `%1' in `t' option", opt); 332 opt = 0; 333 break; 334 } 335 if (n < 1) 336 n = 1; 337 linear_truncate_len = int(n); 338 opt = ptr; 339 break; 340 } 341 case '-': 342 if (opt[1] == '\0') { 343 finished_options = 1; 344 opt++; 345 break; 346 } 347 if (strcmp(opt,"-version")==0) { 348 case 'v': 349 printf("GNU refer (groff) version %s\n", Version_string); 350 exit(0); 351 break; 352 } 353 if (strcmp(opt,"-help")==0) { 354 usage(stdout); 355 exit(0); 356 break; 357 } 358 // fall through 359 default: 360 error("unrecognized option `%1'", *opt); 361 usage(stderr); 362 exit(1); 363 break; 364 } 365 } 366 } 367 if (!done_spec) 368 set_label_spec("%1"); 369 if (argc <= 0) { 370 if (bib_flag) 371 do_bib("-"); 372 else 373 do_file("-"); 374 } 375 else { 376 for (int i = 0; i < argc; i++) { 377 if (bib_flag) 378 do_bib(argv[i]); 379 else 380 do_file(argv[i]); 381 } 382 } 383 if (accumulate) 384 output_references(); 385 if (fflush(stdout) < 0) 386 fatal("output error"); 387 return 0; 388} 389 390static void usage(FILE *stream) 391{ 392 fprintf(stream, 393"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" 394" [-sXYZ] [-tN] [-BL.M] [files ...]\n", 395 program_name); 396} 397 398static void possibly_load_default_database() 399{ 400 if (search_default && !default_database_loaded) { 401 char *filename = getenv("REFER"); 402 if (filename) 403 database_list.add_file(filename); 404 else 405 database_list.add_file(DEFAULT_INDEX, 1); 406 default_database_loaded = 1; 407 } 408} 409 410static int is_list(const string &str) 411{ 412 const char *start = str.contents(); 413 const char *end = start + str.length(); 414 while (end > start && csspace(end[-1])) 415 end--; 416 while (start < end && csspace(*start)) 417 start++; 418 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; 419} 420 421static void do_file(const char *filename) 422{ 423 FILE *fp; 424 if (strcmp(filename, "-") == 0) { 425 fp = stdin; 426 } 427 else { 428 errno = 0; 429 fp = fopen(filename, "r"); 430 if (fp == 0) { 431 error("can't open `%1': %2", filename, strerror(errno)); 432 return; 433 } 434 } 435 current_filename = filename; 436 fprintf(outfp, ".lf 1 %s\n", filename); 437 string line; 438 current_lineno = 0; 439 for (;;) { 440 line.clear(); 441 for (;;) { 442 int c = getc(fp); 443 if (c == EOF) { 444 if (line.length() > 0) 445 line += '\n'; 446 break; 447 } 448 if (invalid_input_char(c)) 449 error("invalid input character code %1", c); 450 else { 451 line += c; 452 if (c == '\n') 453 break; 454 } 455 } 456 int len = line.length(); 457 if (len == 0) 458 break; 459 current_lineno++; 460 if (len >= 2 && line[0] == '.' && line[1] == '[') { 461 int start_lineno = current_lineno; 462 int start_of_line = 1; 463 string str; 464 string post; 465 string pre(line.contents() + 2, line.length() - 3); 466 for (;;) { 467 int c = getc(fp); 468 if (c == EOF) { 469 error_with_file_and_line(current_filename, start_lineno, 470 "missing `.]' line"); 471 break; 472 } 473 if (start_of_line) 474 current_lineno++; 475 if (start_of_line && c == '.') { 476 int d = getc(fp); 477 if (d == ']') { 478 while ((d = getc(fp)) != '\n' && d != EOF) { 479 if (invalid_input_char(d)) 480 error("invalid input character code %1", d); 481 else 482 post += d; 483 } 484 break; 485 } 486 if (d != EOF) 487 ungetc(d, fp); 488 } 489 if (invalid_input_char(c)) 490 error("invalid input character code %1", c); 491 else 492 str += c; 493 start_of_line = (c == '\n'); 494 } 495 if (is_list(str)) { 496 output_pending_line(); 497 if (accumulate) 498 output_references(); 499 else 500 error("found `$LIST$' but not accumulating references"); 501 } 502 else { 503 unsigned flags = (accumulate 504 ? store_reference(str) 505 : immediately_handle_reference(str)); 506 if (label_in_text) { 507 if (accumulate && outfp == stdout) 508 divert_to_temporary_file(); 509 if (pending_line.length() == 0) { 510 warning("can't attach citation to previous line"); 511 } 512 else 513 pending_line.set_length(pending_line.length() - 1); 514 string punct; 515 if (move_punctuation) 516 split_punct(pending_line, punct); 517 int have_text = pre.length() > 0 || post.length() > 0; 518 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET 519 |FORCE_RIGHT_BRACKET)); 520 if ((flags & FORCE_LEFT_BRACKET) || !have_text) 521 pending_line += PRE_LABEL_MARKER; 522 pending_line += pre; 523 char lm = LABEL_MARKER + (int)lt; 524 pending_line += lm; 525 pending_line += post; 526 if ((flags & FORCE_RIGHT_BRACKET) || !have_text) 527 pending_line += POST_LABEL_MARKER; 528 pending_line += punct; 529 pending_line += '\n'; 530 } 531 } 532 need_syncing = 1; 533 } 534 else if (len >= 4 535 && line[0] == '.' && line[1] == 'l' && line[2] == 'f' 536 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 537 pending_lf_lines += line; 538 line += '\0'; 539 if (interpret_lf_args(line.contents() + 3)) 540 current_lineno--; 541 } 542 else if (recognize_R1_R2 543 && len >= 4 544 && line[0] == '.' && line[1] == 'R' && line[2] == '1' 545 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 546 line.clear(); 547 int start_of_line = 1; 548 int start_lineno = current_lineno; 549 for (;;) { 550 int c = getc(fp); 551 if (c != EOF && start_of_line) 552 current_lineno++; 553 if (start_of_line && c == '.') { 554 c = getc(fp); 555 if (c == 'R') { 556 c = getc(fp); 557 if (c == '2') { 558 c = getc(fp); 559 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { 560 while (c != EOF && c != '\n') 561 c = getc(fp); 562 break; 563 } 564 else { 565 line += '.'; 566 line += 'R'; 567 line += '2'; 568 } 569 } 570 else { 571 line += '.'; 572 line += 'R'; 573 } 574 } 575 else 576 line += '.'; 577 } 578 if (c == EOF) { 579 error_with_file_and_line(current_filename, start_lineno, 580 "missing `.R2' line"); 581 break; 582 } 583 if (invalid_input_char(c)) 584 error("invalid input character code %1", int(c)); 585 else { 586 line += c; 587 start_of_line = c == '\n'; 588 } 589 } 590 output_pending_line(); 591 if (accumulate) 592 output_references(); 593 else 594 nreferences = 0; 595 process_commands(line, current_filename, start_lineno + 1); 596 need_syncing = 1; 597 } 598 else { 599 output_pending_line(); 600 pending_line = line; 601 } 602 } 603 need_syncing = 0; 604 output_pending_line(); 605 if (fp != stdin) 606 fclose(fp); 607} 608 609class label_processing_state { 610 enum { 611 NORMAL, 612 PENDING_LABEL, 613 PENDING_LABEL_POST, 614 PENDING_LABEL_POST_PRE, 615 PENDING_POST 616 } state; 617 label_type type; // type of pending labels 618 int count; // number of pending labels 619 reference **rptr; // pointer to next reference 620 int rcount; // number of references left 621 FILE *fp; 622 int handle_pending(int c); 623public: 624 label_processing_state(reference **, int, FILE *); 625 ~label_processing_state(); 626 void process(int c); 627}; 628 629static void output_pending_line() 630{ 631 if (label_in_text && !accumulate && ncitations > 0) { 632 label_processing_state state(citation, ncitations, outfp); 633 int len = pending_line.length(); 634 for (int i = 0; i < len; i++) 635 state.process((unsigned char)(pending_line[i])); 636 } 637 else 638 put_string(pending_line, outfp); 639 pending_line.clear(); 640 if (pending_lf_lines.length() > 0) { 641 put_string(pending_lf_lines, outfp); 642 pending_lf_lines.clear(); 643 } 644 if (!accumulate) 645 immediately_output_references(); 646 if (need_syncing) { 647 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); 648 need_syncing = 0; 649 } 650} 651 652static void split_punct(string &line, string &punct) 653{ 654 const char *start = line.contents(); 655 const char *end = start + line.length(); 656 const char *ptr = start; 657 const char *last_token_start = 0; 658 for (;;) { 659 if (ptr >= end) 660 break; 661 last_token_start = ptr; 662 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER 663 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) 664 ptr++; 665 else if (!get_token(&ptr, end)) 666 break; 667 } 668 if (last_token_start) { 669 const token_info *ti = lookup_token(last_token_start, end); 670 if (ti->is_punct()) { 671 punct.append(last_token_start, end - last_token_start); 672 line.set_length(last_token_start - start); 673 } 674 } 675} 676 677static void divert_to_temporary_file() 678{ 679 outfp = xtmpfile(); 680} 681 682static void store_citation(reference *ref) 683{ 684 if (ncitations >= citation_max) { 685 if (citation == 0) 686 citation = new reference*[citation_max = 100]; 687 else { 688 reference **old_citation = citation; 689 citation_max *= 2; 690 citation = new reference *[citation_max]; 691 memcpy(citation, old_citation, ncitations*sizeof(reference *)); 692 a_delete old_citation; 693 } 694 } 695 citation[ncitations++] = ref; 696} 697 698static unsigned store_reference(const string &str) 699{ 700 if (reference_hash_table == 0) { 701 reference_hash_table = new reference *[17]; 702 hash_table_size = 17; 703 for (int i = 0; i < hash_table_size; i++) 704 reference_hash_table[i] = 0; 705 } 706 unsigned flags; 707 reference *ref = make_reference(str, &flags); 708 ref->compute_hash_code(); 709 unsigned h = ref->hash(); 710 reference **ptr; 711 for (ptr = reference_hash_table + (h % hash_table_size); 712 *ptr != 0; 713 ((ptr == reference_hash_table) 714 ? (ptr = reference_hash_table + hash_table_size - 1) 715 : --ptr)) 716 if (same_reference(**ptr, *ref)) 717 break; 718 if (*ptr != 0) { 719 if (ref->is_merged()) 720 warning("fields ignored because reference already used"); 721 delete ref; 722 ref = *ptr; 723 } 724 else { 725 *ptr = ref; 726 ref->set_number(nreferences); 727 nreferences++; 728 ref->pre_compute_label(); 729 ref->compute_sort_key(); 730 if (nreferences*2 >= hash_table_size) { 731 // Rehash it. 732 reference **old_table = reference_hash_table; 733 int old_size = hash_table_size; 734 hash_table_size = next_size(hash_table_size); 735 reference_hash_table = new reference*[hash_table_size]; 736 int i; 737 for (i = 0; i < hash_table_size; i++) 738 reference_hash_table[i] = 0; 739 for (i = 0; i < old_size; i++) 740 if (old_table[i]) { 741 reference **p; 742 for (p = (reference_hash_table 743 + (old_table[i]->hash() % hash_table_size)); 744 *p; 745 ((p == reference_hash_table) 746 ? (p = reference_hash_table + hash_table_size - 1) 747 : --p)) 748 ; 749 *p = old_table[i]; 750 } 751 a_delete old_table; 752 } 753 } 754 if (label_in_text) 755 store_citation(ref); 756 return flags; 757} 758 759unsigned immediately_handle_reference(const string &str) 760{ 761 unsigned flags; 762 reference *ref = make_reference(str, &flags); 763 ref->set_number(nreferences); 764 if (label_in_text || label_in_reference) { 765 ref->pre_compute_label(); 766 ref->immediate_compute_label(); 767 } 768 nreferences++; 769 store_citation(ref); 770 return flags; 771} 772 773static void immediately_output_references() 774{ 775 for (int i = 0; i < ncitations; i++) { 776 reference *ref = citation[i]; 777 if (label_in_reference) { 778 fputs(".ds [F ", outfp); 779 const string &label = ref->get_label(NORMAL_LABEL); 780 if (label.length() > 0 781 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 782 putc('"', outfp); 783 put_string(label, outfp); 784 putc('\n', outfp); 785 } 786 ref->output(outfp); 787 delete ref; 788 } 789 ncitations = 0; 790} 791 792static void output_citation_group(reference **v, int n, label_type type, 793 FILE *fp) 794{ 795 if (sort_adjacent_labels) { 796 // Do an insertion sort. Usually n will be very small. 797 for (int i = 1; i < n; i++) { 798 int num = v[i]->get_number(); 799 reference *temp = v[i]; 800 int j; 801 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) 802 v[j + 1] = v[j]; 803 v[j + 1] = temp; 804 } 805 } 806 // This messes up if !accumulate. 807 if (accumulate && n > 1) { 808 // remove duplicates 809 int j = 1; 810 for (int i = 1; i < n; i++) 811 if (v[i]->get_label(type) != v[i - 1]->get_label(type)) 812 v[j++] = v[i]; 813 n = j; 814 } 815 string merged_label; 816 for (int i = 0; i < n; i++) { 817 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); 818 if (nmerged > 0) { 819 put_string(merged_label, fp); 820 i += nmerged; 821 } 822 else 823 put_string(v[i]->get_label(type), fp); 824 if (i < n - 1) 825 put_string(sep_label, fp); 826 } 827} 828 829 830label_processing_state::label_processing_state(reference **p, int n, FILE *f) 831: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) 832{ 833} 834 835label_processing_state::~label_processing_state() 836{ 837 int handled = handle_pending(EOF); 838 assert(!handled); 839 assert(rcount == 0); 840} 841 842int label_processing_state::handle_pending(int c) 843{ 844 switch (state) { 845 case NORMAL: 846 break; 847 case PENDING_LABEL: 848 if (c == POST_LABEL_MARKER) { 849 state = PENDING_LABEL_POST; 850 return 1; 851 } 852 else { 853 output_citation_group(rptr, count, type, fp); 854 rptr += count ; 855 rcount -= count; 856 state = NORMAL; 857 } 858 break; 859 case PENDING_LABEL_POST: 860 if (c == PRE_LABEL_MARKER) { 861 state = PENDING_LABEL_POST_PRE; 862 return 1; 863 } 864 else { 865 output_citation_group(rptr, count, type, fp); 866 rptr += count; 867 rcount -= count; 868 put_string(post_label, fp); 869 state = NORMAL; 870 } 871 break; 872 case PENDING_LABEL_POST_PRE: 873 if (c >= LABEL_MARKER 874 && c < LABEL_MARKER + N_LABEL_TYPES 875 && c - LABEL_MARKER == type) { 876 count += 1; 877 state = PENDING_LABEL; 878 return 1; 879 } 880 else { 881 output_citation_group(rptr, count, type, fp); 882 rptr += count; 883 rcount -= count; 884 put_string(sep_label, fp); 885 state = NORMAL; 886 } 887 break; 888 case PENDING_POST: 889 if (c == PRE_LABEL_MARKER) { 890 put_string(sep_label, fp); 891 state = NORMAL; 892 return 1; 893 } 894 else { 895 put_string(post_label, fp); 896 state = NORMAL; 897 } 898 break; 899 } 900 return 0; 901} 902 903void label_processing_state::process(int c) 904{ 905 if (handle_pending(c)) 906 return; 907 assert(state == NORMAL); 908 switch (c) { 909 case PRE_LABEL_MARKER: 910 put_string(pre_label, fp); 911 state = NORMAL; 912 break; 913 case POST_LABEL_MARKER: 914 state = PENDING_POST; 915 break; 916 case LABEL_MARKER: 917 case LABEL_MARKER + 1: 918 count = 1; 919 state = PENDING_LABEL; 920 type = label_type(c - LABEL_MARKER); 921 break; 922 default: 923 state = NORMAL; 924 putc(c, fp); 925 break; 926 } 927} 928 929extern "C" { 930 931int rcompare(const void *p1, const void *p2) 932{ 933 return compare_reference(**(reference **)p1, **(reference **)p2); 934} 935 936} 937 938void output_references() 939{ 940 assert(accumulate); 941 if (nreferences > 0) { 942 int j = 0; 943 int i; 944 for (i = 0; i < hash_table_size; i++) 945 if (reference_hash_table[i] != 0) 946 reference_hash_table[j++] = reference_hash_table[i]; 947 assert(j == nreferences); 948 for (; j < hash_table_size; j++) 949 reference_hash_table[j] = 0; 950 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); 951 for (i = 0; i < nreferences; i++) 952 reference_hash_table[i]->set_number(i); 953 compute_labels(reference_hash_table, nreferences); 954 } 955 if (outfp != stdout) { 956 rewind(outfp); 957 { 958 label_processing_state state(citation, ncitations, stdout); 959 int c; 960 while ((c = getc(outfp)) != EOF) 961 state.process(c); 962 } 963 ncitations = 0; 964 fclose(outfp); 965 outfp = stdout; 966 } 967 if (nreferences > 0) { 968 fputs(".]<\n", outfp); 969 for (int i = 0; i < nreferences; i++) { 970 if (sort_fields.length() > 0) 971 reference_hash_table[i]->print_sort_key_comment(outfp); 972 if (label_in_reference) { 973 fputs(".ds [F ", outfp); 974 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); 975 if (label.length() > 0 976 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 977 putc('"', outfp); 978 put_string(label, outfp); 979 putc('\n', outfp); 980 } 981 reference_hash_table[i]->output(outfp); 982 delete reference_hash_table[i]; 983 reference_hash_table[i] = 0; 984 } 985 fputs(".]>\n", outfp); 986 nreferences = 0; 987 } 988 clear_labels(); 989} 990 991static reference *find_reference(const char *query, int query_len) 992{ 993 // This is so that error messages look better. 994 while (query_len > 0 && csspace(query[query_len - 1])) 995 query_len--; 996 string str; 997 for (int i = 0; i < query_len; i++) 998 str += query[i] == '\n' ? ' ' : query[i]; 999 str += '\0'; 1000 possibly_load_default_database(); 1001 search_list_iterator iter(&database_list, str.contents()); 1002 reference_id rid; 1003 const char *start; 1004 int len; 1005 if (!iter.next(&start, &len, &rid)) { 1006 error("no matches for `%1'", str.contents()); 1007 return 0; 1008 } 1009 const char *end = start + len; 1010 while (start < end) { 1011 if (*start == '%') 1012 break; 1013 while (start < end && *start++ != '\n') 1014 ; 1015 } 1016 if (start >= end) { 1017 error("found a reference for `%1' but it didn't contain any fields", 1018 str.contents()); 1019 return 0; 1020 } 1021 reference *result = new reference(start, end - start, &rid); 1022 if (iter.next(&start, &len, &rid)) 1023 warning("multiple matches for `%1'", str.contents()); 1024 return result; 1025} 1026 1027static reference *make_reference(const string &str, unsigned *flagsp) 1028{ 1029 const char *start = str.contents(); 1030 const char *end = start + str.length(); 1031 const char *ptr = start; 1032 while (ptr < end) { 1033 if (*ptr == '%') 1034 break; 1035 while (ptr < end && *ptr++ != '\n') 1036 ; 1037 } 1038 *flagsp = 0; 1039 for (; start < ptr; start++) { 1040 if (*start == '#') 1041 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET 1042 | FORCE_LEFT_BRACKET))); 1043 else if (*start == '[') 1044 *flagsp |= FORCE_LEFT_BRACKET; 1045 else if (*start == ']') 1046 *flagsp |= FORCE_RIGHT_BRACKET; 1047 else if (!csspace(*start)) 1048 break; 1049 } 1050 if (start >= end) { 1051 error("empty reference"); 1052 return new reference; 1053 } 1054 reference *database_ref = 0; 1055 if (start < ptr) 1056 database_ref = find_reference(start, ptr - start); 1057 reference *inline_ref = 0; 1058 if (ptr < end) 1059 inline_ref = new reference(ptr, end - ptr); 1060 if (inline_ref) { 1061 if (database_ref) { 1062 database_ref->merge(*inline_ref); 1063 delete inline_ref; 1064 return database_ref; 1065 } 1066 else 1067 return inline_ref; 1068 } 1069 else if (database_ref) 1070 return database_ref; 1071 else 1072 return new reference; 1073} 1074 1075static void do_ref(const string &str) 1076{ 1077 if (accumulate) 1078 (void)store_reference(str); 1079 else { 1080 (void)immediately_handle_reference(str); 1081 immediately_output_references(); 1082 } 1083} 1084 1085static void trim_blanks(string &str) 1086{ 1087 const char *start = str.contents(); 1088 const char *end = start + str.length(); 1089 while (end > start && end[-1] != '\n' && csspace(end[-1])) 1090 --end; 1091 str.set_length(end - start); 1092} 1093 1094void do_bib(const char *filename) 1095{ 1096 FILE *fp; 1097 if (strcmp(filename, "-") == 0) 1098 fp = stdin; 1099 else { 1100 errno = 0; 1101 fp = fopen(filename, "r"); 1102 if (fp == 0) { 1103 error("can't open `%1': %2", filename, strerror(errno)); 1104 return; 1105 } 1106 current_filename = filename; 1107 } 1108 enum { 1109 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT 1110 } state = START; 1111 string body; 1112 for (;;) { 1113 int c = getc(fp); 1114 if (c == EOF) 1115 break; 1116 if (invalid_input_char(c)) { 1117 error("invalid input character code %1", c); 1118 continue; 1119 } 1120 switch (state) { 1121 case START: 1122 if (c == '%') { 1123 body = c; 1124 state = BODY; 1125 } 1126 else if (c != '\n') 1127 state = MIDDLE; 1128 break; 1129 case MIDDLE: 1130 if (c == '\n') 1131 state = START; 1132 break; 1133 case BODY: 1134 body += c; 1135 if (c == '\n') 1136 state = BODY_START; 1137 break; 1138 case BODY_START: 1139 if (c == '\n') { 1140 do_ref(body); 1141 state = START; 1142 } 1143 else if (c == '.') 1144 state = BODY_DOT; 1145 else if (csspace(c)) { 1146 state = BODY_BLANK; 1147 body += c; 1148 } 1149 else { 1150 body += c; 1151 state = BODY; 1152 } 1153 break; 1154 case BODY_BLANK: 1155 if (c == '\n') { 1156 trim_blanks(body); 1157 do_ref(body); 1158 state = START; 1159 } 1160 else if (csspace(c)) 1161 body += c; 1162 else { 1163 body += c; 1164 state = BODY; 1165 } 1166 break; 1167 case BODY_DOT: 1168 if (c == ']') { 1169 do_ref(body); 1170 state = MIDDLE; 1171 } 1172 else { 1173 body += '.'; 1174 body += c; 1175 state = c == '\n' ? BODY_START : BODY; 1176 } 1177 break; 1178 default: 1179 assert(0); 1180 } 1181 if (c == '\n') 1182 current_lineno++; 1183 } 1184 switch (state) { 1185 case START: 1186 case MIDDLE: 1187 break; 1188 case BODY: 1189 body += '\n'; 1190 do_ref(body); 1191 break; 1192 case BODY_DOT: 1193 case BODY_START: 1194 do_ref(body); 1195 break; 1196 case BODY_BLANK: 1197 trim_blanks(body); 1198 do_ref(body); 1199 break; 1200 } 1201 fclose(fp); 1202} 1203 1204// from the Dragon Book 1205 1206unsigned hash_string(const char *s, int len) 1207{ 1208 const char *end = s + len; 1209 unsigned h = 0, g; 1210 while (s < end) { 1211 h <<= 4; 1212 h += *s++; 1213 if ((g = h & 0xf0000000) != 0) { 1214 h ^= g >> 24; 1215 h ^= g; 1216 } 1217 } 1218 return h; 1219} 1220 1221int next_size(int n) 1222{ 1223 static const int table_sizes[] = { 1224 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, 1225 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, 1226 16000057, 32000011, 64000031, 128000003, 0 1227 }; 1228 1229 const int *p; 1230 for (p = table_sizes; *p <= n && *p != 0; p++) 1231 ; 1232 assert(*p != 0); 1233 return *p; 1234} 1235 1236