1/* $NetBSD$ */ 2 3// -*- C++ -*- 4/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004 5 Free Software Foundation, Inc. 6 Written by James Clark (jjc@jclark.com) 7 8This file is part of groff. 9 10groff is free software; you can redistribute it and/or modify it under 11the terms of the GNU General Public License as published by the Free 12Software Foundation; either version 2, or (at your option) any later 13version. 14 15groff is distributed in the hope that it will be useful, but WITHOUT ANY 16WARRANTY; without even the implied warranty of MERCHANTABILITY or 17FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18for more details. 19 20You should have received a copy of the GNU General Public License along 21with groff; see the file COPYING. If not, write to the Free Software 22Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24#include "refer.h" 25#include "refid.h" 26#include "ref.h" 27#include "token.h" 28#include "search.h" 29#include "command.h" 30 31extern "C" const char *Version_string; 32 33const char PRE_LABEL_MARKER = '\013'; 34const char POST_LABEL_MARKER = '\014'; 35const char LABEL_MARKER = '\015'; // label_type is added on 36 37#define FORCE_LEFT_BRACKET 04 38#define FORCE_RIGHT_BRACKET 010 39 40static FILE *outfp = stdout; 41 42string capitalize_fields; 43string reverse_fields; 44string abbreviate_fields; 45string period_before_last_name = ". "; 46string period_before_initial = "."; 47string period_before_hyphen = ""; 48string period_before_other = ". "; 49string sort_fields; 50int annotation_field = -1; 51string annotation_macro; 52string discard_fields = "XYZ"; 53string pre_label = "\\*([."; 54string post_label = "\\*(.]"; 55string sep_label = ", "; 56int accumulate = 0; 57int move_punctuation = 0; 58int abbreviate_label_ranges = 0; 59string label_range_indicator; 60int label_in_text = 1; 61int label_in_reference = 1; 62int date_as_label = 0; 63int sort_adjacent_labels = 0; 64// Join exactly two authors with this. 65string join_authors_exactly_two = " and "; 66// When there are more than two authors join the last two with this. 67string join_authors_last_two = ", and "; 68// Otherwise join authors with this. 69string join_authors_default = ", "; 70string separate_label_second_parts = ", "; 71// Use this string to represent that there are other authors. 72string et_al = " et al"; 73// Use et al only if it can replace at least this many authors. 74int et_al_min_elide = 2; 75// Use et al only if the total number of authors is at least this. 76int et_al_min_total = 3; 77 78 79int compatible_flag = 0; 80 81int short_label_flag = 0; 82 83static int recognize_R1_R2 = 1; 84 85search_list database_list; 86int search_default = 1; 87static int default_database_loaded = 0; 88 89static reference **citation = 0; 90static int ncitations = 0; 91static int citation_max = 0; 92 93static reference **reference_hash_table = 0; 94static int hash_table_size; 95static int nreferences = 0; 96 97static int need_syncing = 0; 98string pending_line; 99string pending_lf_lines; 100 101static void output_pending_line(); 102static unsigned immediately_handle_reference(const string &); 103static void immediately_output_references(); 104static unsigned store_reference(const string &); 105static void divert_to_temporary_file(); 106static reference *make_reference(const string &, unsigned *); 107static void usage(FILE *stream); 108static void do_file(const char *); 109static void split_punct(string &line, string &punct); 110static void output_citation_group(reference **v, int n, label_type, FILE *fp); 111static void possibly_load_default_database(); 112 113int main(int argc, char **argv) 114{ 115 program_name = argv[0]; 116 static char stderr_buf[BUFSIZ]; 117 setbuf(stderr, stderr_buf); 118 outfp = stdout; 119 int finished_options = 0; 120 int bib_flag = 0; 121 int done_spec = 0; 122 123 for (--argc, ++argv; 124 !finished_options && argc > 0 && argv[0][0] == '-' 125 && argv[0][1] != '\0'; 126 argv++, argc--) { 127 const char *opt = argv[0] + 1; 128 while (opt != 0 && *opt != '\0') { 129 switch (*opt) { 130 case 'C': 131 compatible_flag = 1; 132 opt++; 133 break; 134 case 'B': 135 bib_flag = 1; 136 label_in_reference = 0; 137 label_in_text = 0; 138 ++opt; 139 if (*opt == '\0') { 140 annotation_field = 'X'; 141 annotation_macro = "AP"; 142 } 143 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { 144 annotation_field = opt[0]; 145 annotation_macro = opt + 2; 146 } 147 opt = 0; 148 break; 149 case 'P': 150 move_punctuation = 1; 151 opt++; 152 break; 153 case 'R': 154 recognize_R1_R2 = 0; 155 opt++; 156 break; 157 case 'S': 158 // Not a very useful spec. 159 set_label_spec("(A.n|Q)', '(D.y|D)"); 160 done_spec = 1; 161 pre_label = " ("; 162 post_label = ")"; 163 sep_label = "; "; 164 opt++; 165 break; 166 case 'V': 167 verify_flag = 1; 168 opt++; 169 break; 170 case 'f': 171 { 172 const char *num = 0; 173 if (*++opt == '\0') { 174 if (argc > 1) { 175 num = *++argv; 176 --argc; 177 } 178 else { 179 error("option `f' requires an argument"); 180 usage(stderr); 181 exit(1); 182 } 183 } 184 else { 185 num = opt; 186 opt = 0; 187 } 188 const char *ptr; 189 for (ptr = num; *ptr; ptr++) 190 if (!csdigit(*ptr)) { 191 error("bad character `%1' in argument to -f option", *ptr); 192 break; 193 } 194 if (*ptr == '\0') { 195 string spec; 196 spec = '%'; 197 spec += num; 198 spec += '\0'; 199 set_label_spec(spec.contents()); 200 done_spec = 1; 201 } 202 break; 203 } 204 case 'b': 205 label_in_text = 0; 206 label_in_reference = 0; 207 opt++; 208 break; 209 case 'e': 210 accumulate = 1; 211 opt++; 212 break; 213 case 'c': 214 capitalize_fields = ++opt; 215 opt = 0; 216 break; 217 case 'k': 218 { 219 char buf[5]; 220 if (csalpha(*++opt)) 221 buf[0] = *opt++; 222 else { 223 if (*opt != '\0') 224 error("bad field name `%1'", *opt++); 225 buf[0] = 'L'; 226 } 227 buf[1] = '~'; 228 buf[2] = '%'; 229 buf[3] = 'a'; 230 buf[4] = '\0'; 231 set_label_spec(buf); 232 done_spec = 1; 233 } 234 break; 235 case 'a': 236 { 237 const char *ptr; 238 for (ptr = ++opt; *ptr; ptr++) 239 if (!csdigit(*ptr)) { 240 error("argument to `a' option not a number"); 241 break; 242 } 243 if (*ptr == '\0') { 244 reverse_fields = 'A'; 245 reverse_fields += opt; 246 } 247 opt = 0; 248 } 249 break; 250 case 'i': 251 linear_ignore_fields = ++opt; 252 opt = 0; 253 break; 254 case 'l': 255 { 256 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a 257 strcpy(buf, "A.n"); 258 if (*++opt != '\0' && *opt != ',') { 259 char *ptr; 260 long n = strtol(opt, &ptr, 10); 261 if (n == 0 && ptr == opt) { 262 error("bad integer `%1' in `l' option", opt); 263 opt = 0; 264 break; 265 } 266 if (n < 0) 267 n = 0; 268 opt = ptr; 269 sprintf(strchr(buf, '\0'), "+%ld", n); 270 } 271 strcat(buf, "D.y"); 272 if (*opt == ',') 273 opt++; 274 if (*opt != '\0') { 275 char *ptr; 276 long n = strtol(opt, &ptr, 10); 277 if (n == 0 && ptr == opt) { 278 error("bad integer `%1' in `l' option", opt); 279 opt = 0; 280 break; 281 } 282 if (n < 0) 283 n = 0; 284 sprintf(strchr(buf, '\0'), "-%ld", n); 285 opt = ptr; 286 if (*opt != '\0') 287 error("argument to `l' option not of form `m,n'"); 288 } 289 strcat(buf, "%a"); 290 if (!set_label_spec(buf)) 291 assert(0); 292 done_spec = 1; 293 } 294 break; 295 case 'n': 296 search_default = 0; 297 opt++; 298 break; 299 case 'p': 300 { 301 const char *filename = 0; 302 if (*++opt == '\0') { 303 if (argc > 1) { 304 filename = *++argv; 305 argc--; 306 } 307 else { 308 error("option `p' requires an argument"); 309 usage(stderr); 310 exit(1); 311 } 312 } 313 else { 314 filename = opt; 315 opt = 0; 316 } 317 database_list.add_file(filename); 318 } 319 break; 320 case 's': 321 if (*++opt == '\0') 322 sort_fields = "AD"; 323 else { 324 sort_fields = opt; 325 opt = 0; 326 } 327 accumulate = 1; 328 break; 329 case 't': 330 { 331 char *ptr; 332 long n = strtol(opt, &ptr, 10); 333 if (n == 0 && ptr == opt) { 334 error("bad integer `%1' in `t' option", opt); 335 opt = 0; 336 break; 337 } 338 if (n < 1) 339 n = 1; 340 linear_truncate_len = int(n); 341 opt = ptr; 342 break; 343 } 344 case '-': 345 if (opt[1] == '\0') { 346 finished_options = 1; 347 opt++; 348 break; 349 } 350 if (strcmp(opt,"-version")==0) { 351 case 'v': 352 printf("GNU refer (groff) version %s\n", Version_string); 353 exit(0); 354 break; 355 } 356 if (strcmp(opt,"-help")==0) { 357 usage(stdout); 358 exit(0); 359 break; 360 } 361 // fall through 362 default: 363 error("unrecognized option `%1'", *opt); 364 usage(stderr); 365 exit(1); 366 break; 367 } 368 } 369 } 370 if (!done_spec) 371 set_label_spec("%1"); 372 if (argc <= 0) { 373 if (bib_flag) 374 do_bib("-"); 375 else 376 do_file("-"); 377 } 378 else { 379 for (int i = 0; i < argc; i++) { 380 if (bib_flag) 381 do_bib(argv[i]); 382 else 383 do_file(argv[i]); 384 } 385 } 386 if (accumulate) 387 output_references(); 388 if (fflush(stdout) < 0) 389 fatal("output error"); 390 return 0; 391} 392 393static void usage(FILE *stream) 394{ 395 fprintf(stream, 396"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" 397" [-sXYZ] [-tN] [-BL.M] [files ...]\n", 398 program_name); 399} 400 401static void possibly_load_default_database() 402{ 403 if (search_default && !default_database_loaded) { 404 char *filename = getenv("REFER"); 405 if (filename) 406 database_list.add_file(filename); 407 else 408 database_list.add_file(DEFAULT_INDEX, 1); 409 default_database_loaded = 1; 410 } 411} 412 413static int is_list(const string &str) 414{ 415 const char *start = str.contents(); 416 const char *end = start + str.length(); 417 while (end > start && csspace(end[-1])) 418 end--; 419 while (start < end && csspace(*start)) 420 start++; 421 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; 422} 423 424static void do_file(const char *filename) 425{ 426 FILE *fp; 427 if (strcmp(filename, "-") == 0) { 428 fp = stdin; 429 } 430 else { 431 errno = 0; 432 fp = fopen(filename, "r"); 433 if (fp == 0) { 434 error("can't open `%1': %2", filename, strerror(errno)); 435 return; 436 } 437 } 438 current_filename = filename; 439 fprintf(outfp, ".lf 1 %s\n", filename); 440 string line; 441 current_lineno = 0; 442 for (;;) { 443 line.clear(); 444 for (;;) { 445 int c = getc(fp); 446 if (c == EOF) { 447 if (line.length() > 0) 448 line += '\n'; 449 break; 450 } 451 if (invalid_input_char(c)) 452 error("invalid input character code %1", c); 453 else { 454 line += c; 455 if (c == '\n') 456 break; 457 } 458 } 459 int len = line.length(); 460 if (len == 0) 461 break; 462 current_lineno++; 463 if (len >= 2 && line[0] == '.' && line[1] == '[') { 464 int start_lineno = current_lineno; 465 int start_of_line = 1; 466 string str; 467 string post; 468 string pre(line.contents() + 2, line.length() - 3); 469 for (;;) { 470 int c = getc(fp); 471 if (c == EOF) { 472 error_with_file_and_line(current_filename, start_lineno, 473 "missing `.]' line"); 474 break; 475 } 476 if (start_of_line) 477 current_lineno++; 478 if (start_of_line && c == '.') { 479 int d = getc(fp); 480 if (d == ']') { 481 while ((d = getc(fp)) != '\n' && d != EOF) { 482 if (invalid_input_char(d)) 483 error("invalid input character code %1", d); 484 else 485 post += d; 486 } 487 break; 488 } 489 if (d != EOF) 490 ungetc(d, fp); 491 } 492 if (invalid_input_char(c)) 493 error("invalid input character code %1", c); 494 else 495 str += c; 496 start_of_line = (c == '\n'); 497 } 498 if (is_list(str)) { 499 output_pending_line(); 500 if (accumulate) 501 output_references(); 502 else 503 error("found `$LIST$' but not accumulating references"); 504 } 505 else { 506 unsigned flags = (accumulate 507 ? store_reference(str) 508 : immediately_handle_reference(str)); 509 if (label_in_text) { 510 if (accumulate && outfp == stdout) 511 divert_to_temporary_file(); 512 if (pending_line.length() == 0) { 513 warning("can't attach citation to previous line"); 514 } 515 else 516 pending_line.set_length(pending_line.length() - 1); 517 string punct; 518 if (move_punctuation) 519 split_punct(pending_line, punct); 520 int have_text = pre.length() > 0 || post.length() > 0; 521 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET 522 |FORCE_RIGHT_BRACKET)); 523 if ((flags & FORCE_LEFT_BRACKET) || !have_text) 524 pending_line += PRE_LABEL_MARKER; 525 pending_line += pre; 526 char lm = LABEL_MARKER + (int)lt; 527 pending_line += lm; 528 pending_line += post; 529 if ((flags & FORCE_RIGHT_BRACKET) || !have_text) 530 pending_line += POST_LABEL_MARKER; 531 pending_line += punct; 532 pending_line += '\n'; 533 } 534 } 535 need_syncing = 1; 536 } 537 else if (len >= 4 538 && line[0] == '.' && line[1] == 'l' && line[2] == 'f' 539 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 540 pending_lf_lines += line; 541 line += '\0'; 542 if (interpret_lf_args(line.contents() + 3)) 543 current_lineno--; 544 } 545 else if (recognize_R1_R2 546 && len >= 4 547 && line[0] == '.' && line[1] == 'R' && line[2] == '1' 548 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 549 line.clear(); 550 int start_of_line = 1; 551 int start_lineno = current_lineno; 552 for (;;) { 553 int c = getc(fp); 554 if (c != EOF && start_of_line) 555 current_lineno++; 556 if (start_of_line && c == '.') { 557 c = getc(fp); 558 if (c == 'R') { 559 c = getc(fp); 560 if (c == '2') { 561 c = getc(fp); 562 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { 563 while (c != EOF && c != '\n') 564 c = getc(fp); 565 break; 566 } 567 else { 568 line += '.'; 569 line += 'R'; 570 line += '2'; 571 } 572 } 573 else { 574 line += '.'; 575 line += 'R'; 576 } 577 } 578 else 579 line += '.'; 580 } 581 if (c == EOF) { 582 error_with_file_and_line(current_filename, start_lineno, 583 "missing `.R2' line"); 584 break; 585 } 586 if (invalid_input_char(c)) 587 error("invalid input character code %1", int(c)); 588 else { 589 line += c; 590 start_of_line = c == '\n'; 591 } 592 } 593 output_pending_line(); 594 if (accumulate) 595 output_references(); 596 else 597 nreferences = 0; 598 process_commands(line, current_filename, start_lineno + 1); 599 need_syncing = 1; 600 } 601 else { 602 output_pending_line(); 603 pending_line = line; 604 } 605 } 606 need_syncing = 0; 607 output_pending_line(); 608 if (fp != stdin) 609 fclose(fp); 610} 611 612class label_processing_state { 613 enum { 614 NORMAL, 615 PENDING_LABEL, 616 PENDING_LABEL_POST, 617 PENDING_LABEL_POST_PRE, 618 PENDING_POST 619 } state; 620 label_type type; // type of pending labels 621 int count; // number of pending labels 622 reference **rptr; // pointer to next reference 623 int rcount; // number of references left 624 FILE *fp; 625 int handle_pending(int c); 626public: 627 label_processing_state(reference **, int, FILE *); 628 ~label_processing_state(); 629 void process(int c); 630}; 631 632static void output_pending_line() 633{ 634 if (label_in_text && !accumulate && ncitations > 0) { 635 label_processing_state state(citation, ncitations, outfp); 636 int len = pending_line.length(); 637 for (int i = 0; i < len; i++) 638 state.process((unsigned char)(pending_line[i])); 639 } 640 else 641 put_string(pending_line, outfp); 642 pending_line.clear(); 643 if (pending_lf_lines.length() > 0) { 644 put_string(pending_lf_lines, outfp); 645 pending_lf_lines.clear(); 646 } 647 if (!accumulate) 648 immediately_output_references(); 649 if (need_syncing) { 650 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); 651 need_syncing = 0; 652 } 653} 654 655static void split_punct(string &line, string &punct) 656{ 657 const char *start = line.contents(); 658 const char *end = start + line.length(); 659 const char *ptr = start; 660 const char *last_token_start = 0; 661 for (;;) { 662 if (ptr >= end) 663 break; 664 last_token_start = ptr; 665 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER 666 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) 667 ptr++; 668 else if (!get_token(&ptr, end)) 669 break; 670 } 671 if (last_token_start) { 672 const token_info *ti = lookup_token(last_token_start, end); 673 if (ti->is_punct()) { 674 punct.append(last_token_start, end - last_token_start); 675 line.set_length(last_token_start - start); 676 } 677 } 678} 679 680static void divert_to_temporary_file() 681{ 682 outfp = xtmpfile(); 683} 684 685static void store_citation(reference *ref) 686{ 687 if (ncitations >= citation_max) { 688 if (citation == 0) 689 citation = new reference*[citation_max = 100]; 690 else { 691 reference **old_citation = citation; 692 citation_max *= 2; 693 citation = new reference *[citation_max]; 694 memcpy(citation, old_citation, ncitations*sizeof(reference *)); 695 a_delete old_citation; 696 } 697 } 698 citation[ncitations++] = ref; 699} 700 701static unsigned store_reference(const string &str) 702{ 703 if (reference_hash_table == 0) { 704 reference_hash_table = new reference *[17]; 705 hash_table_size = 17; 706 for (int i = 0; i < hash_table_size; i++) 707 reference_hash_table[i] = 0; 708 } 709 unsigned flags; 710 reference *ref = make_reference(str, &flags); 711 ref->compute_hash_code(); 712 unsigned h = ref->hash(); 713 reference **ptr; 714 for (ptr = reference_hash_table + (h % hash_table_size); 715 *ptr != 0; 716 ((ptr == reference_hash_table) 717 ? (ptr = reference_hash_table + hash_table_size - 1) 718 : --ptr)) 719 if (same_reference(**ptr, *ref)) 720 break; 721 if (*ptr != 0) { 722 if (ref->is_merged()) 723 warning("fields ignored because reference already used"); 724 delete ref; 725 ref = *ptr; 726 } 727 else { 728 *ptr = ref; 729 ref->set_number(nreferences); 730 nreferences++; 731 ref->pre_compute_label(); 732 ref->compute_sort_key(); 733 if (nreferences*2 >= hash_table_size) { 734 // Rehash it. 735 reference **old_table = reference_hash_table; 736 int old_size = hash_table_size; 737 hash_table_size = next_size(hash_table_size); 738 reference_hash_table = new reference*[hash_table_size]; 739 int i; 740 for (i = 0; i < hash_table_size; i++) 741 reference_hash_table[i] = 0; 742 for (i = 0; i < old_size; i++) 743 if (old_table[i]) { 744 reference **p; 745 for (p = (reference_hash_table 746 + (old_table[i]->hash() % hash_table_size)); 747 *p; 748 ((p == reference_hash_table) 749 ? (p = reference_hash_table + hash_table_size - 1) 750 : --p)) 751 ; 752 *p = old_table[i]; 753 } 754 a_delete old_table; 755 } 756 } 757 if (label_in_text) 758 store_citation(ref); 759 return flags; 760} 761 762unsigned immediately_handle_reference(const string &str) 763{ 764 unsigned flags; 765 reference *ref = make_reference(str, &flags); 766 ref->set_number(nreferences); 767 if (label_in_text || label_in_reference) { 768 ref->pre_compute_label(); 769 ref->immediate_compute_label(); 770 } 771 nreferences++; 772 store_citation(ref); 773 return flags; 774} 775 776static void immediately_output_references() 777{ 778 for (int i = 0; i < ncitations; i++) { 779 reference *ref = citation[i]; 780 if (label_in_reference) { 781 fputs(".ds [F ", outfp); 782 const string &label = ref->get_label(NORMAL_LABEL); 783 if (label.length() > 0 784 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 785 putc('"', outfp); 786 put_string(label, outfp); 787 putc('\n', outfp); 788 } 789 ref->output(outfp); 790 delete ref; 791 } 792 ncitations = 0; 793} 794 795static void output_citation_group(reference **v, int n, label_type type, 796 FILE *fp) 797{ 798 if (sort_adjacent_labels) { 799 // Do an insertion sort. Usually n will be very small. 800 for (int i = 1; i < n; i++) { 801 int num = v[i]->get_number(); 802 reference *temp = v[i]; 803 int j; 804 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) 805 v[j + 1] = v[j]; 806 v[j + 1] = temp; 807 } 808 } 809 // This messes up if !accumulate. 810 if (accumulate && n > 1) { 811 // remove duplicates 812 int j = 1; 813 for (int i = 1; i < n; i++) 814 if (v[i]->get_label(type) != v[i - 1]->get_label(type)) 815 v[j++] = v[i]; 816 n = j; 817 } 818 string merged_label; 819 for (int i = 0; i < n; i++) { 820 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); 821 if (nmerged > 0) { 822 put_string(merged_label, fp); 823 i += nmerged; 824 } 825 else 826 put_string(v[i]->get_label(type), fp); 827 if (i < n - 1) 828 put_string(sep_label, fp); 829 } 830} 831 832 833label_processing_state::label_processing_state(reference **p, int n, FILE *f) 834: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) 835{ 836} 837 838label_processing_state::~label_processing_state() 839{ 840 int handled = handle_pending(EOF); 841 assert(!handled); 842 assert(rcount == 0); 843} 844 845int label_processing_state::handle_pending(int c) 846{ 847 switch (state) { 848 case NORMAL: 849 break; 850 case PENDING_LABEL: 851 if (c == POST_LABEL_MARKER) { 852 state = PENDING_LABEL_POST; 853 return 1; 854 } 855 else { 856 output_citation_group(rptr, count, type, fp); 857 rptr += count ; 858 rcount -= count; 859 state = NORMAL; 860 } 861 break; 862 case PENDING_LABEL_POST: 863 if (c == PRE_LABEL_MARKER) { 864 state = PENDING_LABEL_POST_PRE; 865 return 1; 866 } 867 else { 868 output_citation_group(rptr, count, type, fp); 869 rptr += count; 870 rcount -= count; 871 put_string(post_label, fp); 872 state = NORMAL; 873 } 874 break; 875 case PENDING_LABEL_POST_PRE: 876 if (c >= LABEL_MARKER 877 && c < LABEL_MARKER + N_LABEL_TYPES 878 && c - LABEL_MARKER == type) { 879 count += 1; 880 state = PENDING_LABEL; 881 return 1; 882 } 883 else { 884 output_citation_group(rptr, count, type, fp); 885 rptr += count; 886 rcount -= count; 887 put_string(sep_label, fp); 888 state = NORMAL; 889 } 890 break; 891 case PENDING_POST: 892 if (c == PRE_LABEL_MARKER) { 893 put_string(sep_label, fp); 894 state = NORMAL; 895 return 1; 896 } 897 else { 898 put_string(post_label, fp); 899 state = NORMAL; 900 } 901 break; 902 } 903 return 0; 904} 905 906void label_processing_state::process(int c) 907{ 908 if (handle_pending(c)) 909 return; 910 assert(state == NORMAL); 911 switch (c) { 912 case PRE_LABEL_MARKER: 913 put_string(pre_label, fp); 914 state = NORMAL; 915 break; 916 case POST_LABEL_MARKER: 917 state = PENDING_POST; 918 break; 919 case LABEL_MARKER: 920 case LABEL_MARKER + 1: 921 count = 1; 922 state = PENDING_LABEL; 923 type = label_type(c - LABEL_MARKER); 924 break; 925 default: 926 state = NORMAL; 927 putc(c, fp); 928 break; 929 } 930} 931 932extern "C" { 933 934int rcompare(const void *p1, const void *p2) 935{ 936 return compare_reference(**(reference **)p1, **(reference **)p2); 937} 938 939} 940 941void output_references() 942{ 943 assert(accumulate); 944 if (!hash_table_size) { 945 error("nothing to reference (probably `bibliography' before `sort')"); 946 accumulate = 0; 947 nreferences = 0; 948 return; 949 } 950 if (nreferences > 0) { 951 int j = 0; 952 int i; 953 for (i = 0; i < hash_table_size; i++) 954 if (reference_hash_table[i] != 0) 955 reference_hash_table[j++] = reference_hash_table[i]; 956 assert(j == nreferences); 957 for (; j < hash_table_size; j++) 958 reference_hash_table[j] = 0; 959 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); 960 for (i = 0; i < nreferences; i++) 961 reference_hash_table[i]->set_number(i); 962 compute_labels(reference_hash_table, nreferences); 963 } 964 if (outfp != stdout) { 965 rewind(outfp); 966 { 967 label_processing_state state(citation, ncitations, stdout); 968 int c; 969 while ((c = getc(outfp)) != EOF) 970 state.process(c); 971 } 972 ncitations = 0; 973 fclose(outfp); 974 outfp = stdout; 975 } 976 if (nreferences > 0) { 977 fputs(".]<\n", outfp); 978 for (int i = 0; i < nreferences; i++) { 979 if (sort_fields.length() > 0) 980 reference_hash_table[i]->print_sort_key_comment(outfp); 981 if (label_in_reference) { 982 fputs(".ds [F ", outfp); 983 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); 984 if (label.length() > 0 985 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 986 putc('"', outfp); 987 put_string(label, outfp); 988 putc('\n', outfp); 989 } 990 reference_hash_table[i]->output(outfp); 991 delete reference_hash_table[i]; 992 reference_hash_table[i] = 0; 993 } 994 fputs(".]>\n", outfp); 995 nreferences = 0; 996 } 997 clear_labels(); 998} 999 1000static reference *find_reference(const char *query, int query_len) 1001{ 1002 // This is so that error messages look better. 1003 while (query_len > 0 && csspace(query[query_len - 1])) 1004 query_len--; 1005 string str; 1006 for (int i = 0; i < query_len; i++) 1007 str += query[i] == '\n' ? ' ' : query[i]; 1008 str += '\0'; 1009 possibly_load_default_database(); 1010 search_list_iterator iter(&database_list, str.contents()); 1011 reference_id rid; 1012 const char *start; 1013 int len; 1014 if (!iter.next(&start, &len, &rid)) { 1015 error("no matches for `%1'", str.contents()); 1016 return 0; 1017 } 1018 const char *end = start + len; 1019 while (start < end) { 1020 if (*start == '%') 1021 break; 1022 while (start < end && *start++ != '\n') 1023 ; 1024 } 1025 if (start >= end) { 1026 error("found a reference for `%1' but it didn't contain any fields", 1027 str.contents()); 1028 return 0; 1029 } 1030 reference *result = new reference(start, end - start, &rid); 1031 if (iter.next(&start, &len, &rid)) 1032 warning("multiple matches for `%1'", str.contents()); 1033 return result; 1034} 1035 1036static reference *make_reference(const string &str, unsigned *flagsp) 1037{ 1038 const char *start = str.contents(); 1039 const char *end = start + str.length(); 1040 const char *ptr = start; 1041 while (ptr < end) { 1042 if (*ptr == '%') 1043 break; 1044 while (ptr < end && *ptr++ != '\n') 1045 ; 1046 } 1047 *flagsp = 0; 1048 for (; start < ptr; start++) { 1049 if (*start == '#') 1050 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET 1051 | FORCE_LEFT_BRACKET))); 1052 else if (*start == '[') 1053 *flagsp |= FORCE_LEFT_BRACKET; 1054 else if (*start == ']') 1055 *flagsp |= FORCE_RIGHT_BRACKET; 1056 else if (!csspace(*start)) 1057 break; 1058 } 1059 if (start >= end) { 1060 error("empty reference"); 1061 return new reference; 1062 } 1063 reference *database_ref = 0; 1064 if (start < ptr) 1065 database_ref = find_reference(start, ptr - start); 1066 reference *inline_ref = 0; 1067 if (ptr < end) 1068 inline_ref = new reference(ptr, end - ptr); 1069 if (inline_ref) { 1070 if (database_ref) { 1071 database_ref->merge(*inline_ref); 1072 delete inline_ref; 1073 return database_ref; 1074 } 1075 else 1076 return inline_ref; 1077 } 1078 else if (database_ref) 1079 return database_ref; 1080 else 1081 return new reference; 1082} 1083 1084static void do_ref(const string &str) 1085{ 1086 if (accumulate) 1087 (void)store_reference(str); 1088 else { 1089 (void)immediately_handle_reference(str); 1090 immediately_output_references(); 1091 } 1092} 1093 1094static void trim_blanks(string &str) 1095{ 1096 const char *start = str.contents(); 1097 const char *end = start + str.length(); 1098 while (end > start && end[-1] != '\n' && csspace(end[-1])) 1099 --end; 1100 str.set_length(end - start); 1101} 1102 1103void do_bib(const char *filename) 1104{ 1105 FILE *fp; 1106 if (strcmp(filename, "-") == 0) 1107 fp = stdin; 1108 else { 1109 errno = 0; 1110 fp = fopen(filename, "r"); 1111 if (fp == 0) { 1112 error("can't open `%1': %2", filename, strerror(errno)); 1113 return; 1114 } 1115 current_filename = filename; 1116 } 1117 enum { 1118 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT 1119 } state = START; 1120 string body; 1121 for (;;) { 1122 int c = getc(fp); 1123 if (c == EOF) 1124 break; 1125 if (invalid_input_char(c)) { 1126 error("invalid input character code %1", c); 1127 continue; 1128 } 1129 switch (state) { 1130 case START: 1131 if (c == '%') { 1132 body = c; 1133 state = BODY; 1134 } 1135 else if (c != '\n') 1136 state = MIDDLE; 1137 break; 1138 case MIDDLE: 1139 if (c == '\n') 1140 state = START; 1141 break; 1142 case BODY: 1143 body += c; 1144 if (c == '\n') 1145 state = BODY_START; 1146 break; 1147 case BODY_START: 1148 if (c == '\n') { 1149 do_ref(body); 1150 state = START; 1151 } 1152 else if (c == '.') 1153 state = BODY_DOT; 1154 else if (csspace(c)) { 1155 state = BODY_BLANK; 1156 body += c; 1157 } 1158 else { 1159 body += c; 1160 state = BODY; 1161 } 1162 break; 1163 case BODY_BLANK: 1164 if (c == '\n') { 1165 trim_blanks(body); 1166 do_ref(body); 1167 state = START; 1168 } 1169 else if (csspace(c)) 1170 body += c; 1171 else { 1172 body += c; 1173 state = BODY; 1174 } 1175 break; 1176 case BODY_DOT: 1177 if (c == ']') { 1178 do_ref(body); 1179 state = MIDDLE; 1180 } 1181 else { 1182 body += '.'; 1183 body += c; 1184 state = c == '\n' ? BODY_START : BODY; 1185 } 1186 break; 1187 default: 1188 assert(0); 1189 } 1190 if (c == '\n') 1191 current_lineno++; 1192 } 1193 switch (state) { 1194 case START: 1195 case MIDDLE: 1196 break; 1197 case BODY: 1198 body += '\n'; 1199 do_ref(body); 1200 break; 1201 case BODY_DOT: 1202 case BODY_START: 1203 do_ref(body); 1204 break; 1205 case BODY_BLANK: 1206 trim_blanks(body); 1207 do_ref(body); 1208 break; 1209 } 1210 fclose(fp); 1211} 1212 1213// from the Dragon Book 1214 1215unsigned hash_string(const char *s, int len) 1216{ 1217 const char *end = s + len; 1218 unsigned h = 0, g; 1219 while (s < end) { 1220 h <<= 4; 1221 h += *s++; 1222 if ((g = h & 0xf0000000) != 0) { 1223 h ^= g >> 24; 1224 h ^= g; 1225 } 1226 } 1227 return h; 1228} 1229 1230int next_size(int n) 1231{ 1232 static const int table_sizes[] = { 1233 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, 1234 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, 1235 16000057, 32000011, 64000031, 128000003, 0 1236 }; 1237 1238 const int *p; 1239 for (p = table_sizes; *p <= n && *p != 0; p++) 1240 ; 1241 assert(*p != 0); 1242 return *p; 1243} 1244 1245