1// -*- C++ -*- 2/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004 3 Free Software Foundation, Inc. 4 Written by James Clark (jjc@jclark.com) 5 6This file is part of groff. 7 8groff is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 2, or (at your option) any later 11version. 12 13groff is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License along 19with groff; see the file COPYING. If not, write to the Free Software 20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22#include "refer.h" 23#include "refid.h" 24#include "ref.h" 25#include "token.h" 26#include "search.h" 27#include "command.h" 28 29extern "C" const char *Version_string; 30 31const char PRE_LABEL_MARKER = '\013'; 32const char POST_LABEL_MARKER = '\014'; 33const char LABEL_MARKER = '\015'; // label_type is added on 34 35#define FORCE_LEFT_BRACKET 04 36#define FORCE_RIGHT_BRACKET 010 37 38static FILE *outfp = stdout; 39 40string capitalize_fields; 41string reverse_fields; 42string abbreviate_fields; 43string period_before_last_name = ". "; 44string period_before_initial = "."; 45string period_before_hyphen = ""; 46string period_before_other = ". "; 47string sort_fields; 48int annotation_field = -1; 49string annotation_macro; 50string discard_fields = "XYZ"; 51string pre_label = "\\*([."; 52string post_label = "\\*(.]"; 53string sep_label = ", "; 54int accumulate = 0; 55int move_punctuation = 0; 56int abbreviate_label_ranges = 0; 57string label_range_indicator; 58int label_in_text = 1; 59int label_in_reference = 1; 60int date_as_label = 0; 61int sort_adjacent_labels = 0; 62// Join exactly two authors with this. 63string join_authors_exactly_two = " and "; 64// When there are more than two authors join the last two with this. 65string join_authors_last_two = ", and "; 66// Otherwise join authors with this. 67string join_authors_default = ", "; 68string separate_label_second_parts = ", "; 69// Use this string to represent that there are other authors. 70string et_al = " et al"; 71// Use et al only if it can replace at least this many authors. 72int et_al_min_elide = 2; 73// Use et al only if the total number of authors is at least this. 74int et_al_min_total = 3; 75 76 77int compatible_flag = 0; 78 79int short_label_flag = 0; 80 81static int recognize_R1_R2 = 1; 82 83search_list database_list; 84int search_default = 1; 85static int default_database_loaded = 0; 86 87static reference **citation = 0; 88static int ncitations = 0; 89static int citation_max = 0; 90 91static reference **reference_hash_table = 0; 92static int hash_table_size; 93static int nreferences = 0; 94 95static int need_syncing = 0; 96string pending_line; 97string pending_lf_lines; 98 99static void output_pending_line(); 100static unsigned immediately_handle_reference(const string &); 101static void immediately_output_references(); 102static unsigned store_reference(const string &); 103static void divert_to_temporary_file(); 104static reference *make_reference(const string &, unsigned *); 105static void usage(FILE *stream); 106static void do_file(const char *); 107static void split_punct(string &line, string &punct); 108static void output_citation_group(reference **v, int n, label_type, FILE *fp); 109static void possibly_load_default_database(); 110 111int main(int argc, char **argv) 112{ 113 program_name = argv[0]; 114 static char stderr_buf[BUFSIZ]; 115 setbuf(stderr, stderr_buf); 116 outfp = stdout; 117 int finished_options = 0; 118 int bib_flag = 0; 119 int done_spec = 0; 120 121 for (--argc, ++argv; 122 !finished_options && argc > 0 && argv[0][0] == '-' 123 && argv[0][1] != '\0'; 124 argv++, argc--) { 125 const char *opt = argv[0] + 1; 126 while (opt != 0 && *opt != '\0') { 127 switch (*opt) { 128 case 'C': 129 compatible_flag = 1; 130 opt++; 131 break; 132 case 'B': 133 bib_flag = 1; 134 label_in_reference = 0; 135 label_in_text = 0; 136 ++opt; 137 if (*opt == '\0') { 138 annotation_field = 'X'; 139 annotation_macro = "AP"; 140 } 141 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { 142 annotation_field = opt[0]; 143 annotation_macro = opt + 2; 144 } 145 opt = 0; 146 break; 147 case 'P': 148 move_punctuation = 1; 149 opt++; 150 break; 151 case 'R': 152 recognize_R1_R2 = 0; 153 opt++; 154 break; 155 case 'S': 156 // Not a very useful spec. 157 set_label_spec("(A.n|Q)', '(D.y|D)"); 158 done_spec = 1; 159 pre_label = " ("; 160 post_label = ")"; 161 sep_label = "; "; 162 opt++; 163 break; 164 case 'V': 165 verify_flag = 1; 166 opt++; 167 break; 168 case 'f': 169 { 170 const char *num = 0; 171 if (*++opt == '\0') { 172 if (argc > 1) { 173 num = *++argv; 174 --argc; 175 } 176 else { 177 error("option `f' requires an argument"); 178 usage(stderr); 179 exit(1); 180 } 181 } 182 else { 183 num = opt; 184 opt = 0; 185 } 186 const char *ptr; 187 for (ptr = num; *ptr; ptr++) 188 if (!csdigit(*ptr)) { 189 error("bad character `%1' in argument to -f option", *ptr); 190 break; 191 } 192 if (*ptr == '\0') { 193 string spec; 194 spec = '%'; 195 spec += num; 196 spec += '\0'; 197 set_label_spec(spec.contents()); 198 done_spec = 1; 199 } 200 break; 201 } 202 case 'b': 203 label_in_text = 0; 204 label_in_reference = 0; 205 opt++; 206 break; 207 case 'e': 208 accumulate = 1; 209 opt++; 210 break; 211 case 'c': 212 capitalize_fields = ++opt; 213 opt = 0; 214 break; 215 case 'k': 216 { 217 char buf[5]; 218 if (csalpha(*++opt)) 219 buf[0] = *opt++; 220 else { 221 if (*opt != '\0') 222 error("bad field name `%1'", *opt++); 223 buf[0] = 'L'; 224 } 225 buf[1] = '~'; 226 buf[2] = '%'; 227 buf[3] = 'a'; 228 buf[4] = '\0'; 229 set_label_spec(buf); 230 done_spec = 1; 231 } 232 break; 233 case 'a': 234 { 235 const char *ptr; 236 for (ptr = ++opt; *ptr; ptr++) 237 if (!csdigit(*ptr)) { 238 error("argument to `a' option not a number"); 239 break; 240 } 241 if (*ptr == '\0') { 242 reverse_fields = 'A'; 243 reverse_fields += opt; 244 } 245 opt = 0; 246 } 247 break; 248 case 'i': 249 linear_ignore_fields = ++opt; 250 opt = 0; 251 break; 252 case 'l': 253 { 254 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a 255 strcpy(buf, "A.n"); 256 if (*++opt != '\0' && *opt != ',') { 257 char *ptr; 258 long n = strtol(opt, &ptr, 10); 259 if (n == 0 && ptr == opt) { 260 error("bad integer `%1' in `l' option", opt); 261 opt = 0; 262 break; 263 } 264 if (n < 0) 265 n = 0; 266 opt = ptr; 267 sprintf(strchr(buf, '\0'), "+%ld", n); 268 } 269 strcat(buf, "D.y"); 270 if (*opt == ',') 271 opt++; 272 if (*opt != '\0') { 273 char *ptr; 274 long n = strtol(opt, &ptr, 10); 275 if (n == 0 && ptr == opt) { 276 error("bad integer `%1' in `l' option", opt); 277 opt = 0; 278 break; 279 } 280 if (n < 0) 281 n = 0; 282 sprintf(strchr(buf, '\0'), "-%ld", n); 283 opt = ptr; 284 if (*opt != '\0') 285 error("argument to `l' option not of form `m,n'"); 286 } 287 strcat(buf, "%a"); 288 if (!set_label_spec(buf)) 289 assert(0); 290 done_spec = 1; 291 } 292 break; 293 case 'n': 294 search_default = 0; 295 opt++; 296 break; 297 case 'p': 298 { 299 const char *filename = 0; 300 if (*++opt == '\0') { 301 if (argc > 1) { 302 filename = *++argv; 303 argc--; 304 } 305 else { 306 error("option `p' requires an argument"); 307 usage(stderr); 308 exit(1); 309 } 310 } 311 else { 312 filename = opt; 313 opt = 0; 314 } 315 database_list.add_file(filename); 316 } 317 break; 318 case 's': 319 if (*++opt == '\0') 320 sort_fields = "AD"; 321 else { 322 sort_fields = opt; 323 opt = 0; 324 } 325 accumulate = 1; 326 break; 327 case 't': 328 { 329 char *ptr; 330 long n = strtol(opt, &ptr, 10); 331 if (n == 0 && ptr == opt) { 332 error("bad integer `%1' in `t' option", opt); 333 opt = 0; 334 break; 335 } 336 if (n < 1) 337 n = 1; 338 linear_truncate_len = int(n); 339 opt = ptr; 340 break; 341 } 342 case '-': 343 if (opt[1] == '\0') { 344 finished_options = 1; 345 opt++; 346 break; 347 } 348 if (strcmp(opt,"-version")==0) { 349 case 'v': 350 printf("GNU refer (groff) version %s\n", Version_string); 351 exit(0); 352 break; 353 } 354 if (strcmp(opt,"-help")==0) { 355 usage(stdout); 356 exit(0); 357 break; 358 } 359 // fall through 360 default: 361 error("unrecognized option `%1'", *opt); 362 usage(stderr); 363 exit(1); 364 break; 365 } 366 } 367 } 368 if (!done_spec) 369 set_label_spec("%1"); 370 if (argc <= 0) { 371 if (bib_flag) 372 do_bib("-"); 373 else 374 do_file("-"); 375 } 376 else { 377 for (int i = 0; i < argc; i++) { 378 if (bib_flag) 379 do_bib(argv[i]); 380 else 381 do_file(argv[i]); 382 } 383 } 384 if (accumulate) 385 output_references(); 386 if (fflush(stdout) < 0) 387 fatal("output error"); 388 return 0; 389} 390 391static void usage(FILE *stream) 392{ 393 fprintf(stream, 394"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" 395" [-sXYZ] [-tN] [-BL.M] [files ...]\n", 396 program_name); 397} 398 399static void possibly_load_default_database() 400{ 401 if (search_default && !default_database_loaded) { 402 char *filename = getenv("REFER"); 403 if (filename) 404 database_list.add_file(filename); 405 else 406 database_list.add_file(DEFAULT_INDEX, 1); 407 default_database_loaded = 1; 408 } 409} 410 411static int is_list(const string &str) 412{ 413 const char *start = str.contents(); 414 const char *end = start + str.length(); 415 while (end > start && csspace(end[-1])) 416 end--; 417 while (start < end && csspace(*start)) 418 start++; 419 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; 420} 421 422static void do_file(const char *filename) 423{ 424 FILE *fp; 425 if (strcmp(filename, "-") == 0) { 426 fp = stdin; 427 } 428 else { 429 errno = 0; 430 fp = fopen(filename, "r"); 431 if (fp == 0) { 432 error("can't open `%1': %2", filename, strerror(errno)); 433 return; 434 } 435 } 436 current_filename = filename; 437 fprintf(outfp, ".lf 1 %s\n", filename); 438 string line; 439 current_lineno = 0; 440 for (;;) { 441 line.clear(); 442 for (;;) { 443 int c = getc(fp); 444 if (c == EOF) { 445 if (line.length() > 0) 446 line += '\n'; 447 break; 448 } 449 if (invalid_input_char(c)) 450 error("invalid input character code %1", c); 451 else { 452 line += c; 453 if (c == '\n') 454 break; 455 } 456 } 457 int len = line.length(); 458 if (len == 0) 459 break; 460 current_lineno++; 461 if (len >= 2 && line[0] == '.' && line[1] == '[') { 462 int start_lineno = current_lineno; 463 int start_of_line = 1; 464 string str; 465 string post; 466 string pre(line.contents() + 2, line.length() - 3); 467 for (;;) { 468 int c = getc(fp); 469 if (c == EOF) { 470 error_with_file_and_line(current_filename, start_lineno, 471 "missing `.]' line"); 472 break; 473 } 474 if (start_of_line) 475 current_lineno++; 476 if (start_of_line && c == '.') { 477 int d = getc(fp); 478 if (d == ']') { 479 while ((d = getc(fp)) != '\n' && d != EOF) { 480 if (invalid_input_char(d)) 481 error("invalid input character code %1", d); 482 else 483 post += d; 484 } 485 break; 486 } 487 if (d != EOF) 488 ungetc(d, fp); 489 } 490 if (invalid_input_char(c)) 491 error("invalid input character code %1", c); 492 else 493 str += c; 494 start_of_line = (c == '\n'); 495 } 496 if (is_list(str)) { 497 output_pending_line(); 498 if (accumulate) 499 output_references(); 500 else 501 error("found `$LIST$' but not accumulating references"); 502 } 503 else { 504 unsigned flags = (accumulate 505 ? store_reference(str) 506 : immediately_handle_reference(str)); 507 if (label_in_text) { 508 if (accumulate && outfp == stdout) 509 divert_to_temporary_file(); 510 if (pending_line.length() == 0) { 511 warning("can't attach citation to previous line"); 512 } 513 else 514 pending_line.set_length(pending_line.length() - 1); 515 string punct; 516 if (move_punctuation) 517 split_punct(pending_line, punct); 518 int have_text = pre.length() > 0 || post.length() > 0; 519 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET 520 |FORCE_RIGHT_BRACKET)); 521 if ((flags & FORCE_LEFT_BRACKET) || !have_text) 522 pending_line += PRE_LABEL_MARKER; 523 pending_line += pre; 524 char lm = LABEL_MARKER + (int)lt; 525 pending_line += lm; 526 pending_line += post; 527 if ((flags & FORCE_RIGHT_BRACKET) || !have_text) 528 pending_line += POST_LABEL_MARKER; 529 pending_line += punct; 530 pending_line += '\n'; 531 } 532 } 533 need_syncing = 1; 534 } 535 else if (len >= 4 536 && line[0] == '.' && line[1] == 'l' && line[2] == 'f' 537 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 538 pending_lf_lines += line; 539 line += '\0'; 540 if (interpret_lf_args(line.contents() + 3)) 541 current_lineno--; 542 } 543 else if (recognize_R1_R2 544 && len >= 4 545 && line[0] == '.' && line[1] == 'R' && line[2] == '1' 546 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 547 line.clear(); 548 int start_of_line = 1; 549 int start_lineno = current_lineno; 550 for (;;) { 551 int c = getc(fp); 552 if (c != EOF && start_of_line) 553 current_lineno++; 554 if (start_of_line && c == '.') { 555 c = getc(fp); 556 if (c == 'R') { 557 c = getc(fp); 558 if (c == '2') { 559 c = getc(fp); 560 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { 561 while (c != EOF && c != '\n') 562 c = getc(fp); 563 break; 564 } 565 else { 566 line += '.'; 567 line += 'R'; 568 line += '2'; 569 } 570 } 571 else { 572 line += '.'; 573 line += 'R'; 574 } 575 } 576 else 577 line += '.'; 578 } 579 if (c == EOF) { 580 error_with_file_and_line(current_filename, start_lineno, 581 "missing `.R2' line"); 582 break; 583 } 584 if (invalid_input_char(c)) 585 error("invalid input character code %1", int(c)); 586 else { 587 line += c; 588 start_of_line = c == '\n'; 589 } 590 } 591 output_pending_line(); 592 if (accumulate) 593 output_references(); 594 else 595 nreferences = 0; 596 process_commands(line, current_filename, start_lineno + 1); 597 need_syncing = 1; 598 } 599 else { 600 output_pending_line(); 601 pending_line = line; 602 } 603 } 604 need_syncing = 0; 605 output_pending_line(); 606 if (fp != stdin) 607 fclose(fp); 608} 609 610class label_processing_state { 611 enum { 612 NORMAL, 613 PENDING_LABEL, 614 PENDING_LABEL_POST, 615 PENDING_LABEL_POST_PRE, 616 PENDING_POST 617 } state; 618 label_type type; // type of pending labels 619 int count; // number of pending labels 620 reference **rptr; // pointer to next reference 621 int rcount; // number of references left 622 FILE *fp; 623 int handle_pending(int c); 624public: 625 label_processing_state(reference **, int, FILE *); 626 ~label_processing_state(); 627 void process(int c); 628}; 629 630static void output_pending_line() 631{ 632 if (label_in_text && !accumulate && ncitations > 0) { 633 label_processing_state state(citation, ncitations, outfp); 634 int len = pending_line.length(); 635 for (int i = 0; i < len; i++) 636 state.process((unsigned char)(pending_line[i])); 637 } 638 else 639 put_string(pending_line, outfp); 640 pending_line.clear(); 641 if (pending_lf_lines.length() > 0) { 642 put_string(pending_lf_lines, outfp); 643 pending_lf_lines.clear(); 644 } 645 if (!accumulate) 646 immediately_output_references(); 647 if (need_syncing) { 648 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); 649 need_syncing = 0; 650 } 651} 652 653static void split_punct(string &line, string &punct) 654{ 655 const char *start = line.contents(); 656 const char *end = start + line.length(); 657 const char *ptr = start; 658 const char *last_token_start = 0; 659 for (;;) { 660 if (ptr >= end) 661 break; 662 last_token_start = ptr; 663 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER 664 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) 665 ptr++; 666 else if (!get_token(&ptr, end)) 667 break; 668 } 669 if (last_token_start) { 670 const token_info *ti = lookup_token(last_token_start, end); 671 if (ti->is_punct()) { 672 punct.append(last_token_start, end - last_token_start); 673 line.set_length(last_token_start - start); 674 } 675 } 676} 677 678static void divert_to_temporary_file() 679{ 680 outfp = xtmpfile(); 681} 682 683static void store_citation(reference *ref) 684{ 685 if (ncitations >= citation_max) { 686 if (citation == 0) 687 citation = new reference*[citation_max = 100]; 688 else { 689 reference **old_citation = citation; 690 citation_max *= 2; 691 citation = new reference *[citation_max]; 692 memcpy(citation, old_citation, ncitations*sizeof(reference *)); 693 a_delete old_citation; 694 } 695 } 696 citation[ncitations++] = ref; 697} 698 699static unsigned store_reference(const string &str) 700{ 701 if (reference_hash_table == 0) { 702 reference_hash_table = new reference *[17]; 703 hash_table_size = 17; 704 for (int i = 0; i < hash_table_size; i++) 705 reference_hash_table[i] = 0; 706 } 707 unsigned flags; 708 reference *ref = make_reference(str, &flags); 709 ref->compute_hash_code(); 710 unsigned h = ref->hash(); 711 reference **ptr; 712 for (ptr = reference_hash_table + (h % hash_table_size); 713 *ptr != 0; 714 ((ptr == reference_hash_table) 715 ? (ptr = reference_hash_table + hash_table_size - 1) 716 : --ptr)) 717 if (same_reference(**ptr, *ref)) 718 break; 719 if (*ptr != 0) { 720 if (ref->is_merged()) 721 warning("fields ignored because reference already used"); 722 delete ref; 723 ref = *ptr; 724 } 725 else { 726 *ptr = ref; 727 ref->set_number(nreferences); 728 nreferences++; 729 ref->pre_compute_label(); 730 ref->compute_sort_key(); 731 if (nreferences*2 >= hash_table_size) { 732 // Rehash it. 733 reference **old_table = reference_hash_table; 734 int old_size = hash_table_size; 735 hash_table_size = next_size(hash_table_size); 736 reference_hash_table = new reference*[hash_table_size]; 737 int i; 738 for (i = 0; i < hash_table_size; i++) 739 reference_hash_table[i] = 0; 740 for (i = 0; i < old_size; i++) 741 if (old_table[i]) { 742 reference **p; 743 for (p = (reference_hash_table 744 + (old_table[i]->hash() % hash_table_size)); 745 *p; 746 ((p == reference_hash_table) 747 ? (p = reference_hash_table + hash_table_size - 1) 748 : --p)) 749 ; 750 *p = old_table[i]; 751 } 752 a_delete old_table; 753 } 754 } 755 if (label_in_text) 756 store_citation(ref); 757 return flags; 758} 759 760unsigned immediately_handle_reference(const string &str) 761{ 762 unsigned flags; 763 reference *ref = make_reference(str, &flags); 764 ref->set_number(nreferences); 765 if (label_in_text || label_in_reference) { 766 ref->pre_compute_label(); 767 ref->immediate_compute_label(); 768 } 769 nreferences++; 770 store_citation(ref); 771 return flags; 772} 773 774static void immediately_output_references() 775{ 776 for (int i = 0; i < ncitations; i++) { 777 reference *ref = citation[i]; 778 if (label_in_reference) { 779 fputs(".ds [F ", outfp); 780 const string &label = ref->get_label(NORMAL_LABEL); 781 if (label.length() > 0 782 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 783 putc('"', outfp); 784 put_string(label, outfp); 785 putc('\n', outfp); 786 } 787 ref->output(outfp); 788 delete ref; 789 } 790 ncitations = 0; 791} 792 793static void output_citation_group(reference **v, int n, label_type type, 794 FILE *fp) 795{ 796 if (sort_adjacent_labels) { 797 // Do an insertion sort. Usually n will be very small. 798 for (int i = 1; i < n; i++) { 799 int num = v[i]->get_number(); 800 reference *temp = v[i]; 801 int j; 802 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) 803 v[j + 1] = v[j]; 804 v[j + 1] = temp; 805 } 806 } 807 // This messes up if !accumulate. 808 if (accumulate && n > 1) { 809 // remove duplicates 810 int j = 1; 811 for (int i = 1; i < n; i++) 812 if (v[i]->get_label(type) != v[i - 1]->get_label(type)) 813 v[j++] = v[i]; 814 n = j; 815 } 816 string merged_label; 817 for (int i = 0; i < n; i++) { 818 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); 819 if (nmerged > 0) { 820 put_string(merged_label, fp); 821 i += nmerged; 822 } 823 else 824 put_string(v[i]->get_label(type), fp); 825 if (i < n - 1) 826 put_string(sep_label, fp); 827 } 828} 829 830 831label_processing_state::label_processing_state(reference **p, int n, FILE *f) 832: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) 833{ 834} 835 836label_processing_state::~label_processing_state() 837{ 838 int handled = handle_pending(EOF); 839 assert(!handled); 840 assert(rcount == 0); 841} 842 843int label_processing_state::handle_pending(int c) 844{ 845 switch (state) { 846 case NORMAL: 847 break; 848 case PENDING_LABEL: 849 if (c == POST_LABEL_MARKER) { 850 state = PENDING_LABEL_POST; 851 return 1; 852 } 853 else { 854 output_citation_group(rptr, count, type, fp); 855 rptr += count ; 856 rcount -= count; 857 state = NORMAL; 858 } 859 break; 860 case PENDING_LABEL_POST: 861 if (c == PRE_LABEL_MARKER) { 862 state = PENDING_LABEL_POST_PRE; 863 return 1; 864 } 865 else { 866 output_citation_group(rptr, count, type, fp); 867 rptr += count; 868 rcount -= count; 869 put_string(post_label, fp); 870 state = NORMAL; 871 } 872 break; 873 case PENDING_LABEL_POST_PRE: 874 if (c >= LABEL_MARKER 875 && c < LABEL_MARKER + N_LABEL_TYPES 876 && c - LABEL_MARKER == type) { 877 count += 1; 878 state = PENDING_LABEL; 879 return 1; 880 } 881 else { 882 output_citation_group(rptr, count, type, fp); 883 rptr += count; 884 rcount -= count; 885 put_string(sep_label, fp); 886 state = NORMAL; 887 } 888 break; 889 case PENDING_POST: 890 if (c == PRE_LABEL_MARKER) { 891 put_string(sep_label, fp); 892 state = NORMAL; 893 return 1; 894 } 895 else { 896 put_string(post_label, fp); 897 state = NORMAL; 898 } 899 break; 900 } 901 return 0; 902} 903 904void label_processing_state::process(int c) 905{ 906 if (handle_pending(c)) 907 return; 908 assert(state == NORMAL); 909 switch (c) { 910 case PRE_LABEL_MARKER: 911 put_string(pre_label, fp); 912 state = NORMAL; 913 break; 914 case POST_LABEL_MARKER: 915 state = PENDING_POST; 916 break; 917 case LABEL_MARKER: 918 case LABEL_MARKER + 1: 919 count = 1; 920 state = PENDING_LABEL; 921 type = label_type(c - LABEL_MARKER); 922 break; 923 default: 924 state = NORMAL; 925 putc(c, fp); 926 break; 927 } 928} 929 930extern "C" { 931 932int rcompare(const void *p1, const void *p2) 933{ 934 return compare_reference(**(reference **)p1, **(reference **)p2); 935} 936 937} 938 939void output_references() 940{ 941 assert(accumulate); 942 if (!hash_table_size) { 943 error("nothing to reference (probably `bibliography' before `sort')"); 944 accumulate = 0; 945 nreferences = 0; 946 return; 947 } 948 if (nreferences > 0) { 949 int j = 0; 950 int i; 951 for (i = 0; i < hash_table_size; i++) 952 if (reference_hash_table[i] != 0) 953 reference_hash_table[j++] = reference_hash_table[i]; 954 assert(j == nreferences); 955 for (; j < hash_table_size; j++) 956 reference_hash_table[j] = 0; 957 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); 958 for (i = 0; i < nreferences; i++) 959 reference_hash_table[i]->set_number(i); 960 compute_labels(reference_hash_table, nreferences); 961 } 962 if (outfp != stdout) { 963 rewind(outfp); 964 { 965 label_processing_state state(citation, ncitations, stdout); 966 int c; 967 while ((c = getc(outfp)) != EOF) 968 state.process(c); 969 } 970 ncitations = 0; 971 fclose(outfp); 972 outfp = stdout; 973 } 974 if (nreferences > 0) { 975 fputs(".]<\n", outfp); 976 for (int i = 0; i < nreferences; i++) { 977 if (sort_fields.length() > 0) 978 reference_hash_table[i]->print_sort_key_comment(outfp); 979 if (label_in_reference) { 980 fputs(".ds [F ", outfp); 981 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); 982 if (label.length() > 0 983 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 984 putc('"', outfp); 985 put_string(label, outfp); 986 putc('\n', outfp); 987 } 988 reference_hash_table[i]->output(outfp); 989 delete reference_hash_table[i]; 990 reference_hash_table[i] = 0; 991 } 992 fputs(".]>\n", outfp); 993 nreferences = 0; 994 } 995 clear_labels(); 996} 997 998static reference *find_reference(const char *query, int query_len) 999{ 1000 // This is so that error messages look better. 1001 while (query_len > 0 && csspace(query[query_len - 1])) 1002 query_len--; 1003 string str; 1004 for (int i = 0; i < query_len; i++) 1005 str += query[i] == '\n' ? ' ' : query[i]; 1006 str += '\0'; 1007 possibly_load_default_database(); 1008 search_list_iterator iter(&database_list, str.contents()); 1009 reference_id rid; 1010 const char *start; 1011 int len; 1012 if (!iter.next(&start, &len, &rid)) { 1013 error("no matches for `%1'", str.contents()); 1014 return 0; 1015 } 1016 const char *end = start + len; 1017 while (start < end) { 1018 if (*start == '%') 1019 break; 1020 while (start < end && *start++ != '\n') 1021 ; 1022 } 1023 if (start >= end) { 1024 error("found a reference for `%1' but it didn't contain any fields", 1025 str.contents()); 1026 return 0; 1027 } 1028 reference *result = new reference(start, end - start, &rid); 1029 if (iter.next(&start, &len, &rid)) 1030 warning("multiple matches for `%1'", str.contents()); 1031 return result; 1032} 1033 1034static reference *make_reference(const string &str, unsigned *flagsp) 1035{ 1036 const char *start = str.contents(); 1037 const char *end = start + str.length(); 1038 const char *ptr = start; 1039 while (ptr < end) { 1040 if (*ptr == '%') 1041 break; 1042 while (ptr < end && *ptr++ != '\n') 1043 ; 1044 } 1045 *flagsp = 0; 1046 for (; start < ptr; start++) { 1047 if (*start == '#') 1048 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET 1049 | FORCE_LEFT_BRACKET))); 1050 else if (*start == '[') 1051 *flagsp |= FORCE_LEFT_BRACKET; 1052 else if (*start == ']') 1053 *flagsp |= FORCE_RIGHT_BRACKET; 1054 else if (!csspace(*start)) 1055 break; 1056 } 1057 if (start >= end) { 1058 error("empty reference"); 1059 return new reference; 1060 } 1061 reference *database_ref = 0; 1062 if (start < ptr) 1063 database_ref = find_reference(start, ptr - start); 1064 reference *inline_ref = 0; 1065 if (ptr < end) 1066 inline_ref = new reference(ptr, end - ptr); 1067 if (inline_ref) { 1068 if (database_ref) { 1069 database_ref->merge(*inline_ref); 1070 delete inline_ref; 1071 return database_ref; 1072 } 1073 else 1074 return inline_ref; 1075 } 1076 else if (database_ref) 1077 return database_ref; 1078 else 1079 return new reference; 1080} 1081 1082static void do_ref(const string &str) 1083{ 1084 if (accumulate) 1085 (void)store_reference(str); 1086 else { 1087 (void)immediately_handle_reference(str); 1088 immediately_output_references(); 1089 } 1090} 1091 1092static void trim_blanks(string &str) 1093{ 1094 const char *start = str.contents(); 1095 const char *end = start + str.length(); 1096 while (end > start && end[-1] != '\n' && csspace(end[-1])) 1097 --end; 1098 str.set_length(end - start); 1099} 1100 1101void do_bib(const char *filename) 1102{ 1103 FILE *fp; 1104 if (strcmp(filename, "-") == 0) 1105 fp = stdin; 1106 else { 1107 errno = 0; 1108 fp = fopen(filename, "r"); 1109 if (fp == 0) { 1110 error("can't open `%1': %2", filename, strerror(errno)); 1111 return; 1112 } 1113 current_filename = filename; 1114 } 1115 enum { 1116 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT 1117 } state = START; 1118 string body; 1119 for (;;) { 1120 int c = getc(fp); 1121 if (c == EOF) 1122 break; 1123 if (invalid_input_char(c)) { 1124 error("invalid input character code %1", c); 1125 continue; 1126 } 1127 switch (state) { 1128 case START: 1129 if (c == '%') { 1130 body = c; 1131 state = BODY; 1132 } 1133 else if (c != '\n') 1134 state = MIDDLE; 1135 break; 1136 case MIDDLE: 1137 if (c == '\n') 1138 state = START; 1139 break; 1140 case BODY: 1141 body += c; 1142 if (c == '\n') 1143 state = BODY_START; 1144 break; 1145 case BODY_START: 1146 if (c == '\n') { 1147 do_ref(body); 1148 state = START; 1149 } 1150 else if (c == '.') 1151 state = BODY_DOT; 1152 else if (csspace(c)) { 1153 state = BODY_BLANK; 1154 body += c; 1155 } 1156 else { 1157 body += c; 1158 state = BODY; 1159 } 1160 break; 1161 case BODY_BLANK: 1162 if (c == '\n') { 1163 trim_blanks(body); 1164 do_ref(body); 1165 state = START; 1166 } 1167 else if (csspace(c)) 1168 body += c; 1169 else { 1170 body += c; 1171 state = BODY; 1172 } 1173 break; 1174 case BODY_DOT: 1175 if (c == ']') { 1176 do_ref(body); 1177 state = MIDDLE; 1178 } 1179 else { 1180 body += '.'; 1181 body += c; 1182 state = c == '\n' ? BODY_START : BODY; 1183 } 1184 break; 1185 default: 1186 assert(0); 1187 } 1188 if (c == '\n') 1189 current_lineno++; 1190 } 1191 switch (state) { 1192 case START: 1193 case MIDDLE: 1194 break; 1195 case BODY: 1196 body += '\n'; 1197 do_ref(body); 1198 break; 1199 case BODY_DOT: 1200 case BODY_START: 1201 do_ref(body); 1202 break; 1203 case BODY_BLANK: 1204 trim_blanks(body); 1205 do_ref(body); 1206 break; 1207 } 1208 fclose(fp); 1209} 1210 1211// from the Dragon Book 1212 1213unsigned hash_string(const char *s, int len) 1214{ 1215 const char *end = s + len; 1216 unsigned h = 0, g; 1217 while (s < end) { 1218 h <<= 4; 1219 h += *s++; 1220 if ((g = h & 0xf0000000) != 0) { 1221 h ^= g >> 24; 1222 h ^= g; 1223 } 1224 } 1225 return h; 1226} 1227 1228int next_size(int n) 1229{ 1230 static const int table_sizes[] = { 1231 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, 1232 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, 1233 16000057, 32000011, 64000031, 128000003, 0 1234 }; 1235 1236 const int *p; 1237 for (p = table_sizes; *p <= n && *p != 0; p++) 1238 ; 1239 assert(*p != 0); 1240 return *p; 1241} 1242 1243