1151912Sphk/* $NetBSD$ */ 2151912Sphk 3209440Smav// -*- C++ -*- 4151912Sphk/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004 5151912Sphk Free Software Foundation, Inc. 6151912Sphk Written by James Clark (jjc@jclark.com) 7151912Sphk 8151912SphkThis file is part of groff. 9151912Sphk 10151912Sphkgroff is free software; you can redistribute it and/or modify it under 11151912Sphkthe terms of the GNU General Public License as published by the Free 12151912SphkSoftware Foundation; either version 2, or (at your option) any later 13151912Sphkversion. 14151912Sphk 15151912Sphkgroff is distributed in the hope that it will be useful, but WITHOUT ANY 16151912SphkWARRANTY; without even the implied warranty of MERCHANTABILITY or 17151912SphkFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18151912Sphkfor more details. 19151912Sphk 20151912SphkYou should have received a copy of the GNU General Public License along 21151912Sphkwith groff; see the file COPYING. If not, write to the Free Software 22151912SphkFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 23151912Sphk 24151912Sphk#include "refer.h" 25151912Sphk#include "refid.h" 26151912Sphk#include "ref.h" 27151912Sphk#include "token.h" 28151912Sphk#include "search.h" 29151912Sphk#include "command.h" 30151912Sphk 31151912Sphkextern "C" const char *Version_string; 32209402Smav 33209371Smavconst char PRE_LABEL_MARKER = '\013'; 34209371Smavconst char POST_LABEL_MARKER = '\014'; 35209371Smavconst char LABEL_MARKER = '\015'; // label_type is added on 36209371Smav 37151912Sphk#define FORCE_LEFT_BRACKET 04 38159217Snjl#define FORCE_RIGHT_BRACKET 010 39151912Sphk 40151912Sphkstatic FILE *outfp = stdout; 41209371Smav 42151912Sphkstring capitalize_fields; 43151912Sphkstring reverse_fields; 44209371Smavstring abbreviate_fields; 45209371Smavstring period_before_last_name = ". "; 46209371Smavstring period_before_initial = "."; 47151912Sphkstring period_before_hyphen = ""; 48159217Snjlstring period_before_other = ". "; 49193530Sjkimstring sort_fields; 50193530Sjkimint annotation_field = -1; 51193530Sjkimstring annotation_macro; 52151912Sphkstring discard_fields = "XYZ"; 53175385Sjhbstring pre_label = "\\*([."; 54151912Sphkstring post_label = "\\*(.]"; 55209371Smavstring sep_label = ", "; 56209371Smavint accumulate = 0; 57209371Smavint move_punctuation = 0; 58209371Smavint abbreviate_label_ranges = 0; 59203062Savgstring label_range_indicator; 60203062Savgint label_in_text = 1; 61203062Savgint label_in_reference = 1; 62151912Sphkint date_as_label = 0; 63151912Sphkint sort_adjacent_labels = 0; 64209371Smav// Join exactly two authors with this. 65169574Stakawatastring join_authors_exactly_two = " and "; 66151931Sscottl// When there are more than two authors join the last two with this. 67151935Sscottlstring join_authors_last_two = ", and "; 68151931Sscottl// Otherwise join authors with this. 69151931Sscottlstring join_authors_default = ", "; 70209371Smavstring separate_label_second_parts = ", "; 71151912Sphk// Use this string to represent that there are other authors. 72209371Smavstring et_al = " et al"; 73209371Smav// Use et al only if it can replace at least this many authors. 74209371Smavint et_al_min_elide = 2; 75209371Smav// Use et al only if the total number of authors is at least this. 76209440Smavint et_al_min_total = 3; 77159217Snjl 78209371Smav 79209371Smavint compatible_flag = 0; 80151912Sphk 81209371Smavint short_label_flag = 0; 82209440Smav 83209371Smavstatic int recognize_R1_R2 = 1; 84209371Smav 85209371Smavsearch_list database_list; 86209371Smavint search_default = 1; 87209371Smavstatic int default_database_loaded = 0; 88209371Smav 89209371Smavstatic reference **citation = 0; 90209371Smavstatic int ncitations = 0; 91209371Smavstatic int citation_max = 0; 92209371Smav 93209371Smavstatic reference **reference_hash_table = 0; 94209371Smavstatic int hash_table_size; 95209371Smavstatic int nreferences = 0; 96209371Smav 97209371Smavstatic int need_syncing = 0; 98209371Smavstring pending_line; 99209371Smavstring pending_lf_lines; 100209371Smav 101209371Smavstatic void output_pending_line(); 102151912Sphkstatic unsigned immediately_handle_reference(const string &); 103151912Sphkstatic void immediately_output_references(); 104159217Snjlstatic unsigned store_reference(const string &); 105209371Smavstatic void divert_to_temporary_file(); 106151912Sphkstatic reference *make_reference(const string &, unsigned *); 107159217Snjlstatic void usage(FILE *stream); 108159217Snjlstatic void do_file(const char *); 109159217Snjlstatic void split_punct(string &line, string &punct); 110151912Sphkstatic void output_citation_group(reference **v, int n, label_type, FILE *fp); 111151912Sphkstatic void possibly_load_default_database(); 112209371Smav 113151912Sphkint main(int argc, char **argv) 114151912Sphk{ 115175385Sjhb program_name = argv[0]; 116151912Sphk static char stderr_buf[BUFSIZ]; 117151912Sphk setbuf(stderr, stderr_buf); 118175361Sjhb outfp = stdout; 119209371Smav int finished_options = 0; 120175361Sjhb int bib_flag = 0; 121175361Sjhb int done_spec = 0; 122175385Sjhb 123175385Sjhb for (--argc, ++argv; 124209440Smav !finished_options && argc > 0 && argv[0][0] == '-' 125209440Smav && argv[0][1] != '\0'; 126209440Smav argv++, argc--) { 127209440Smav const char *opt = argv[0] + 1; 128185103Sjkim while (opt != 0 && *opt != '\0') { 129185103Sjkim switch (*opt) { 130175361Sjhb case 'C': 131175361Sjhb compatible_flag = 1; 132175361Sjhb opt++; 133209371Smav break; 134175361Sjhb case 'B': 135175361Sjhb bib_flag = 1; 136175385Sjhb label_in_reference = 0; 137175385Sjhb label_in_text = 0; 138185103Sjkim ++opt; 139185103Sjkim if (*opt == '\0') { 140175361Sjhb annotation_field = 'X'; 141175361Sjhb annotation_macro = "AP"; 142209371Smav } 143209371Smav else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { 144209371Smav annotation_field = opt[0]; 145209371Smav annotation_macro = opt + 2; 146209371Smav } 147209371Smav opt = 0; 148209371Smav break; 149209371Smav case 'P': 150209371Smav move_punctuation = 1; 151209371Smav opt++; 152209371Smav break; 153209371Smav case 'R': 154209371Smav recognize_R1_R2 = 0; 155209371Smav opt++; 156209371Smav break; 157209371Smav case 'S': 158209371Smav // Not a very useful spec. 159209371Smav set_label_spec("(A.n|Q)', '(D.y|D)"); 160209371Smav done_spec = 1; 161210290Smav pre_label = " ("; 162210290Smav post_label = ")"; 163210290Smav sep_label = "; "; 164210290Smav opt++; 165210290Smav break; 166210290Smav case 'V': 167209371Smav verify_flag = 1; 168209371Smav opt++; 169209371Smav break; 170209371Smav case 'f': 171209371Smav { 172209371Smav const char *num = 0; 173209371Smav if (*++opt == '\0') { 174209371Smav if (argc > 1) { 175209371Smav num = *++argv; 176209371Smav --argc; 177209371Smav } 178209371Smav else { 179209371Smav error("option `f' requires an argument"); 180209371Smav usage(stderr); 181209371Smav exit(1); 182209371Smav } 183209371Smav } 184209371Smav else { 185209371Smav num = opt; 186209371Smav opt = 0; 187209371Smav } 188209371Smav const char *ptr; 189209371Smav for (ptr = num; *ptr; ptr++) 190209371Smav if (!csdigit(*ptr)) { 191209371Smav error("bad character `%1' in argument to -f option", *ptr); 192209371Smav break; 193209371Smav } 194209371Smav if (*ptr == '\0') { 195209371Smav string spec; 196209371Smav spec = '%'; 197209371Smav spec += num; 198209371Smav spec += '\0'; 199209371Smav set_label_spec(spec.contents()); 200209371Smav done_spec = 1; 201209371Smav } 202209371Smav break; 203209371Smav } 204209371Smav case 'b': 205209371Smav label_in_text = 0; 206209371Smav label_in_reference = 0; 207209371Smav opt++; 208209371Smav break; 209209371Smav case 'e': 210209371Smav accumulate = 1; 211209371Smav opt++; 212209371Smav break; 213209371Smav case 'c': 214209371Smav capitalize_fields = ++opt; 215209371Smav opt = 0; 216209371Smav break; 217209371Smav case 'k': 218209371Smav { 219209371Smav char buf[5]; 220209990Smav if (csalpha(*++opt)) 221209990Smav buf[0] = *opt++; 222209371Smav else { 223209371Smav if (*opt != '\0') 224209371Smav error("bad field name `%1'", *opt++); 225209371Smav buf[0] = 'L'; 226209371Smav } 227209371Smav buf[1] = '~'; 228209371Smav buf[2] = '%'; 229209371Smav buf[3] = 'a'; 230209371Smav buf[4] = '\0'; 231209371Smav set_label_spec(buf); 232209371Smav done_spec = 1; 233209371Smav } 234209371Smav break; 235209371Smav case 'a': 236209371Smav { 237209371Smav const char *ptr; 238209371Smav for (ptr = ++opt; *ptr; ptr++) 239209371Smav if (!csdigit(*ptr)) { 240209371Smav error("argument to `a' option not a number"); 241209371Smav break; 242209371Smav } 243209371Smav if (*ptr == '\0') { 244209371Smav reverse_fields = 'A'; 245209371Smav reverse_fields += opt; 246208436Smav } 247209371Smav opt = 0; 248208436Smav } 249208436Smav break; 250208436Smav case 'i': 251208436Smav linear_ignore_fields = ++opt; 252208438Smav opt = 0; 253208436Smav break; 254208436Smav case 'l': 255208436Smav { 256208436Smav char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a 257208436Smav strcpy(buf, "A.n"); 258208436Smav if (*++opt != '\0' && *opt != ',') { 259208436Smav char *ptr; 260209371Smav long n = strtol(opt, &ptr, 10); 261209371Smav if (n == 0 && ptr == opt) { 262208436Smav error("bad integer `%1' in `l' option", opt); 263208436Smav opt = 0; 264208436Smav break; 265208436Smav } 266169592Snjl if (n < 0) 267172489Snjl n = 0; 268209371Smav opt = ptr; 269169574Stakawata sprintf(strchr(buf, '\0'), "+%ld", n); 270169574Stakawata } 271169574Stakawata strcat(buf, "D.y"); 272169574Stakawata if (*opt == ',') 273208436Smav opt++; 274169574Stakawata if (*opt != '\0') { 275172489Snjl char *ptr; 276209371Smav long n = strtol(opt, &ptr, 10); 277172489Snjl if (n == 0 && ptr == opt) { 278208436Smav error("bad integer `%1' in `l' option", opt); 279208436Smav opt = 0; 280208436Smav break; 281208436Smav } 282208436Smav if (n < 0) 283208436Smav n = 0; 284208436Smav sprintf(strchr(buf, '\0'), "-%ld", n); 285208436Smav opt = ptr; 286209371Smav if (*opt != '\0') 287208436Smav error("argument to `l' option not of form `m,n'"); 288208436Smav } 289208436Smav strcat(buf, "%a"); 290208436Smav if (!set_label_spec(buf)) 291209371Smav assert(0); 292208436Smav done_spec = 1; 293208436Smav } 294208436Smav break; 295208436Smav case 'n': 296208436Smav search_default = 0; 297208436Smav opt++; 298169574Stakawata break; 299169574Stakawata case 'p': 300169574Stakawata { 301151912Sphk const char *filename = 0; 302209371Smav if (*++opt == '\0') { 303151912Sphk if (argc > 1) { 304159217Snjl filename = *++argv; 305159217Snjl argc--; 306169592Snjl } 307151912Sphk else { 308199016Savg error("option `p' requires an argument"); 309208436Smav usage(stderr); 310169592Snjl exit(1); 311151912Sphk } 312159217Snjl } 313151912Sphk else { 314151912Sphk filename = opt; 315151912Sphk opt = 0; 316151912Sphk } 317209371Smav database_list.add_file(filename); 318151912Sphk } 319209371Smav break; 320209371Smav case 's': 321209371Smav if (*++opt == '\0') 322209371Smav sort_fields = "AD"; 323209371Smav else { 324209371Smav sort_fields = opt; 325209371Smav opt = 0; 326151912Sphk } 327151912Sphk accumulate = 1; 328151912Sphk break; 329151912Sphk case 't': 330151912Sphk { 331151912Sphk char *ptr; 332151912Sphk long n = strtol(opt, &ptr, 10); 333209371Smav if (n == 0 && ptr == opt) { 334209371Smav error("bad integer `%1' in `t' option", opt); 335159217Snjl opt = 0; 336159217Snjl break; 337159217Snjl } 338151912Sphk if (n < 1) 339159217Snjl n = 1; 340159217Snjl linear_truncate_len = int(n); 341159217Snjl opt = ptr; 342159217Snjl break; 343159217Snjl } 344159217Snjl case '-': 345159217Snjl if (opt[1] == '\0') { 346151912Sphk finished_options = 1; 347171547Snjl opt++; 348175361Sjhb break; 349171547Snjl } 350159217Snjl if (strcmp(opt,"-version")==0) { 351175385Sjhb case 'v': 352175361Sjhb printf("GNU refer (groff) version %s\n", Version_string); 353175361Sjhb exit(0); 354175361Sjhb break; 355175361Sjhb } 356175361Sjhb if (strcmp(opt,"-help")==0) { 357175361Sjhb usage(stdout); 358175361Sjhb exit(0); 359209371Smav break; 360209440Smav } 361209440Smav // fall through 362209440Smav default: 363209440Smav error("unrecognized option `%1'", *opt); 364209371Smav usage(stderr); 365209371Smav exit(1); 366209371Smav break; 367209371Smav } 368209371Smav } 369209371Smav } 370209371Smav if (!done_spec) 371209371Smav set_label_spec("%1"); 372209371Smav if (argc <= 0) { 373159217Snjl if (bib_flag) 374159217Snjl do_bib("-"); 375209371Smav else 376209440Smav do_file("-"); 377209440Smav } 378209440Smav else { 379209440Smav for (int i = 0; i < argc; i++) { 380159217Snjl if (bib_flag) 381209371Smav do_bib(argv[i]); 382209371Smav else 383209371Smav do_file(argv[i]); 384209371Smav } 385209371Smav } 386209371Smav if (accumulate) 387209371Smav output_references(); 388209371Smav if (fflush(stdout) < 0) 389209371Smav fatal("output error"); 390209371Smav return 0; 391209371Smav} 392209371Smav 393209371Smavstatic void usage(FILE *stream) 394209371Smav{ 395209371Smav fprintf(stream, 396209371Smav"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" 397209371Smav" [-sXYZ] [-tN] [-BL.M] [files ...]\n", 398209371Smav program_name); 399209371Smav} 400159217Snjl 401209371Smavstatic void possibly_load_default_database() 402171547Snjl{ 403171547Snjl if (search_default && !default_database_loaded) { 404171547Snjl char *filename = getenv("REFER"); 405171547Snjl if (filename) 406175385Sjhb database_list.add_file(filename); 407171547Snjl else 408175385Sjhb database_list.add_file(DEFAULT_INDEX, 1); 409171547Snjl default_database_loaded = 1; 410171547Snjl } 411175361Sjhb} 412171547Snjl 413171547Snjlstatic int is_list(const string &str) 414171547Snjl{ 415208436Smav const char *start = str.contents(); 416208436Smav const char *end = start + str.length(); 417209371Smav while (end > start && csspace(end[-1])) 418209371Smav end--; 419209371Smav while (start < end && csspace(*start)) 420209371Smav start++; 421209371Smav return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; 422209371Smav} 423209371Smav 424208436Smavstatic void do_file(const char *filename) 425209371Smav{ 426209371Smav FILE *fp; 427209371Smav if (strcmp(filename, "-") == 0) { 428209371Smav fp = stdin; 429209440Smav } 430209440Smav else { 431209440Smav errno = 0; 432209440Smav fp = fopen(filename, "r"); 433209440Smav if (fp == 0) { 434209440Smav error("can't open `%1': %2", filename, strerror(errno)); 435209440Smav return; 436209440Smav } 437209440Smav } 438209440Smav current_filename = filename; 439209440Smav fprintf(outfp, ".lf 1 %s\n", filename); 440209440Smav string line; 441209440Smav current_lineno = 0; 442209440Smav for (;;) { 443209371Smav line.clear(); 444209371Smav for (;;) { 445209371Smav int c = getc(fp); 446209371Smav if (c == EOF) { 447209371Smav if (line.length() > 0) 448209371Smav line += '\n'; 449209371Smav break; 450209371Smav } 451209371Smav if (invalid_input_char(c)) 452209371Smav error("invalid input character code %1", c); 453209371Smav else { 454209371Smav line += c; 455209371Smav if (c == '\n') 456209440Smav break; 457209440Smav } 458209371Smav } 459209440Smav int len = line.length(); 460209371Smav if (len == 0) 461209371Smav break; 462209371Smav current_lineno++; 463209371Smav if (len >= 2 && line[0] == '.' && line[1] == '[') { 464209440Smav int start_lineno = current_lineno; 465209440Smav int start_of_line = 1; 466209440Smav string str; 467209440Smav string post; 468209440Smav string pre(line.contents() + 2, line.length() - 3); 469209440Smav for (;;) { 470209371Smav int c = getc(fp); 471209440Smav if (c == EOF) { 472209440Smav error_with_file_and_line(current_filename, start_lineno, 473209440Smav "missing `.]' line"); 474209440Smav break; 475209371Smav } 476209371Smav if (start_of_line) 477209371Smav current_lineno++; 478209371Smav if (start_of_line && c == '.') { 479209440Smav int d = getc(fp); 480209440Smav if (d == ']') { 481209440Smav while ((d = getc(fp)) != '\n' && d != EOF) { 482209371Smav if (invalid_input_char(d)) 483209371Smav error("invalid input character code %1", d); 484209371Smav else 485209371Smav post += d; 486209371Smav } 487209440Smav break; 488209440Smav } 489209371Smav if (d != EOF) 490209371Smav ungetc(d, fp); 491209371Smav } 492209371Smav if (invalid_input_char(c)) 493209440Smav error("invalid input character code %1", c); 494209440Smav else 495209440Smav str += c; 496209440Smav start_of_line = (c == '\n'); 497209440Smav } 498209440Smav if (is_list(str)) { 499209440Smav output_pending_line(); 500209440Smav if (accumulate) 501209440Smav output_references(); 502209440Smav else 503209440Smav error("found `$LIST$' but not accumulating references"); 504209440Smav } 505209440Smav else { 506209440Smav unsigned flags = (accumulate 507209440Smav ? store_reference(str) 508209440Smav : immediately_handle_reference(str)); 509209440Smav if (label_in_text) { 510209440Smav if (accumulate && outfp == stdout) 511209440Smav divert_to_temporary_file(); 512209440Smav if (pending_line.length() == 0) { 513209440Smav warning("can't attach citation to previous line"); 514209440Smav } 515209371Smav else 516209371Smav pending_line.set_length(pending_line.length() - 1); 517209371Smav string punct; 518209371Smav if (move_punctuation) 519209371Smav split_punct(pending_line, punct); 520209371Smav int have_text = pre.length() > 0 || post.length() > 0; 521209371Smav label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET 522209371Smav |FORCE_RIGHT_BRACKET)); 523209371Smav if ((flags & FORCE_LEFT_BRACKET) || !have_text) 524209371Smav pending_line += PRE_LABEL_MARKER; 525209371Smav pending_line += pre; 526209371Smav char lm = LABEL_MARKER + (int)lt; 527209371Smav pending_line += lm; 528209371Smav pending_line += post; 529209371Smav if ((flags & FORCE_RIGHT_BRACKET) || !have_text) 530209371Smav pending_line += POST_LABEL_MARKER; 531209371Smav pending_line += punct; 532209371Smav pending_line += '\n'; 533209371Smav } 534209371Smav } 535209371Smav need_syncing = 1; 536209371Smav } 537209371Smav else if (len >= 4 538209371Smav && line[0] == '.' && line[1] == 'l' && line[2] == 'f' 539209371Smav && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 540209371Smav pending_lf_lines += line; 541209371Smav line += '\0'; 542209440Smav if (interpret_lf_args(line.contents() + 3)) 543209371Smav current_lineno--; 544209440Smav } 545209440Smav else if (recognize_R1_R2 546209440Smav && len >= 4 547209371Smav && line[0] == '.' && line[1] == 'R' && line[2] == '1' 548209371Smav && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 549209371Smav line.clear(); 550209371Smav int start_of_line = 1; 551209371Smav int start_lineno = current_lineno; 552209371Smav for (;;) { 553209371Smav int c = getc(fp); 554209371Smav if (c != EOF && start_of_line) 555209371Smav current_lineno++; 556209371Smav if (start_of_line && c == '.') { 557209371Smav c = getc(fp); 558209371Smav if (c == 'R') { 559209371Smav c = getc(fp); 560209371Smav if (c == '2') { 561209371Smav c = getc(fp); 562209371Smav if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { 563209371Smav while (c != EOF && c != '\n') 564209431Smav c = getc(fp); 565209371Smav break; 566209371Smav } 567209371Smav else { 568209371Smav line += '.'; 569209371Smav line += 'R'; 570209371Smav line += '2'; 571209371Smav } 572209371Smav } 573209371Smav else { 574209371Smav line += '.'; 575209371Smav line += 'R'; 576209371Smav } 577209371Smav } 578209371Smav else 579209371Smav line += '.'; 580209371Smav } 581209371Smav if (c == EOF) { 582209371Smav error_with_file_and_line(current_filename, start_lineno, 583209371Smav "missing `.R2' line"); 584209371Smav break; 585209371Smav } 586209371Smav if (invalid_input_char(c)) 587210290Smav error("invalid input character code %1", int(c)); 588210298Smav else { 589210298Smav line += c; 590210290Smav start_of_line = c == '\n'; 591210298Smav } 592209371Smav } 593209371Smav output_pending_line(); 594209371Smav if (accumulate) 595209371Smav output_references(); 596209371Smav else 597209371Smav nreferences = 0; 598209371Smav process_commands(line, current_filename, start_lineno + 1); 599209371Smav need_syncing = 1; 600159217Snjl } 601159217Snjl else { 602159217Snjl output_pending_line(); 603159217Snjl pending_line = line; 604209371Smav } 605159217Snjl } 606159217Snjl need_syncing = 0; 607159217Snjl output_pending_line(); 608159217Snjl if (fp != stdin) 609159217Snjl fclose(fp); 610159217Snjl} 611159217Snjl 612168010Snjlclass label_processing_state { 613209371Smav enum { 614175361Sjhb NORMAL, 615209371Smav PENDING_LABEL, 616175361Sjhb PENDING_LABEL_POST, 617175361Sjhb PENDING_LABEL_POST_PRE, 618175361Sjhb PENDING_POST 619175361Sjhb } state; 620175361Sjhb label_type type; // type of pending labels 621175361Sjhb int count; // number of pending labels 622175361Sjhb reference **rptr; // pointer to next reference 623175361Sjhb int rcount; // number of references left 624175361Sjhb FILE *fp; 625175361Sjhb int handle_pending(int c); 626175361Sjhbpublic: 627175361Sjhb label_processing_state(reference **, int, FILE *); 628175361Sjhb ~label_processing_state(); 629209371Smav void process(int c); 630168010Snjl}; 631209371Smav 632209371Smavstatic void output_pending_line() 633209371Smav{ 634168010Snjl if (label_in_text && !accumulate && ncitations > 0) { 635168010Snjl label_processing_state state(citation, ncitations, outfp); 636168010Snjl int len = pending_line.length(); 637175361Sjhb for (int i = 0; i < len; i++) 638209371Smav state.process((unsigned char)(pending_line[i])); 639209371Smav } 640209371Smav else 641209371Smav put_string(pending_line, outfp); 642209440Smav pending_line.clear(); 643209371Smav if (pending_lf_lines.length() > 0) { 644209371Smav put_string(pending_lf_lines, outfp); 645209371Smav pending_lf_lines.clear(); 646209371Smav } 647209371Smav if (!accumulate) 648209371Smav immediately_output_references(); 649209371Smav if (need_syncing) { 650209371Smav fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); 651209371Smav need_syncing = 0; 652209371Smav } 653209371Smav} 654209371Smav 655209371Smavstatic void split_punct(string &line, string &punct) 656209371Smav{ 657209371Smav const char *start = line.contents(); 658209371Smav const char *end = start + line.length(); 659209371Smav const char *ptr = start; 660209371Smav const char *last_token_start = 0; 661209371Smav for (;;) { 662209371Smav if (ptr >= end) 663209371Smav break; 664209371Smav last_token_start = ptr; 665209371Smav if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER 666209371Smav || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) 667209371Smav ptr++; 668209371Smav else if (!get_token(&ptr, end)) 669209371Smav break; 670209371Smav } 671209371Smav if (last_token_start) { 672209371Smav const token_info *ti = lookup_token(last_token_start, end); 673209371Smav if (ti->is_punct()) { 674209371Smav punct.append(last_token_start, end - last_token_start); 675168010Snjl line.set_length(last_token_start - start); 676168010Snjl } 677168010Snjl } 678159217Snjl} 679159217Snjl 680209371Smavstatic void divert_to_temporary_file() 681159217Snjl{ 682151912Sphk outfp = xtmpfile(); 683151912Sphk} 684151912Sphk 685151912Sphkstatic void store_citation(reference *ref) 686151912Sphk{ 687151912Sphk if (ncitations >= citation_max) { 688151912Sphk if (citation == 0) 689151912Sphk citation = new reference*[citation_max = 100]; 690175385Sjhb else { 691151912Sphk reference **old_citation = citation; 692175385Sjhb citation_max *= 2; 693151912Sphk citation = new reference *[citation_max]; 694175385Sjhb memcpy(citation, old_citation, ncitations*sizeof(reference *)); 695151912Sphk a_delete old_citation; 696151912Sphk } 697151912Sphk } 698151912Sphk citation[ncitations++] = ref; 699151912Sphk} 700151912Sphk 701159217Snjlstatic unsigned store_reference(const string &str) 702151912Sphk{ 703151912Sphk if (reference_hash_table == 0) { 704159217Snjl reference_hash_table = new reference *[17]; 705151912Sphk hash_table_size = 17; 706151912Sphk for (int i = 0; i < hash_table_size; i++) 707209371Smav reference_hash_table[i] = 0; 708209371Smav } 709209371Smav unsigned flags; 710209371Smav reference *ref = make_reference(str, &flags); 711209371Smav ref->compute_hash_code(); 712209371Smav unsigned h = ref->hash(); 713209371Smav reference **ptr; 714209371Smav for (ptr = reference_hash_table + (h % hash_table_size); 715209371Smav *ptr != 0; 716209371Smav ((ptr == reference_hash_table) 717209371Smav ? (ptr = reference_hash_table + hash_table_size - 1) 718209371Smav : --ptr)) 719209371Smav if (same_reference(**ptr, *ref)) 720209371Smav break; 721209371Smav if (*ptr != 0) { 722209371Smav if (ref->is_merged()) 723209371Smav warning("fields ignored because reference already used"); 724209371Smav delete ref; 725209371Smav ref = *ptr; 726209371Smav } 727209371Smav else { 728209371Smav *ptr = ref; 729209371Smav ref->set_number(nreferences); 730209371Smav nreferences++; 731209371Smav ref->pre_compute_label(); 732209371Smav ref->compute_sort_key(); 733209371Smav if (nreferences*2 >= hash_table_size) { 734209371Smav // Rehash it. 735209371Smav reference **old_table = reference_hash_table; 736209371Smav int old_size = hash_table_size; 737151912Sphk hash_table_size = next_size(hash_table_size); 738209371Smav reference_hash_table = new reference*[hash_table_size]; 739209371Smav int i; 740209371Smav for (i = 0; i < hash_table_size; i++) 741209371Smav reference_hash_table[i] = 0; 742209371Smav for (i = 0; i < old_size; i++) 743209371Smav if (old_table[i]) { 744151912Sphk reference **p; 745209371Smav for (p = (reference_hash_table 746209371Smav + (old_table[i]->hash() % hash_table_size)); 747209371Smav *p; 748209371Smav ((p == reference_hash_table) 749151912Sphk ? (p = reference_hash_table + hash_table_size - 1) 750151912Sphk : --p)) 751151912Sphk ; 752209371Smav *p = old_table[i]; 753209371Smav } 754209371Smav a_delete old_table; 755209371Smav } 756151912Sphk } 757151912Sphk if (label_in_text) 758209371Smav store_citation(ref); 759209371Smav return flags; 760} 761 762unsigned immediately_handle_reference(const string &str) 763{ 764 unsigned flags; 765 reference *ref = make_reference(str, &flags); 766 ref->set_number(nreferences); 767 if (label_in_text || label_in_reference) { 768 ref->pre_compute_label(); 769 ref->immediate_compute_label(); 770 } 771 nreferences++; 772 store_citation(ref); 773 return flags; 774} 775 776static void immediately_output_references() 777{ 778 for (int i = 0; i < ncitations; i++) { 779 reference *ref = citation[i]; 780 if (label_in_reference) { 781 fputs(".ds [F ", outfp); 782 const string &label = ref->get_label(NORMAL_LABEL); 783 if (label.length() > 0 784 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 785 putc('"', outfp); 786 put_string(label, outfp); 787 putc('\n', outfp); 788 } 789 ref->output(outfp); 790 delete ref; 791 } 792 ncitations = 0; 793} 794 795static void output_citation_group(reference **v, int n, label_type type, 796 FILE *fp) 797{ 798 if (sort_adjacent_labels) { 799 // Do an insertion sort. Usually n will be very small. 800 for (int i = 1; i < n; i++) { 801 int num = v[i]->get_number(); 802 reference *temp = v[i]; 803 int j; 804 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) 805 v[j + 1] = v[j]; 806 v[j + 1] = temp; 807 } 808 } 809 // This messes up if !accumulate. 810 if (accumulate && n > 1) { 811 // remove duplicates 812 int j = 1; 813 for (int i = 1; i < n; i++) 814 if (v[i]->get_label(type) != v[i - 1]->get_label(type)) 815 v[j++] = v[i]; 816 n = j; 817 } 818 string merged_label; 819 for (int i = 0; i < n; i++) { 820 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); 821 if (nmerged > 0) { 822 put_string(merged_label, fp); 823 i += nmerged; 824 } 825 else 826 put_string(v[i]->get_label(type), fp); 827 if (i < n - 1) 828 put_string(sep_label, fp); 829 } 830} 831 832 833label_processing_state::label_processing_state(reference **p, int n, FILE *f) 834: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) 835{ 836} 837 838label_processing_state::~label_processing_state() 839{ 840 int handled = handle_pending(EOF); 841 assert(!handled); 842 assert(rcount == 0); 843} 844 845int label_processing_state::handle_pending(int c) 846{ 847 switch (state) { 848 case NORMAL: 849 break; 850 case PENDING_LABEL: 851 if (c == POST_LABEL_MARKER) { 852 state = PENDING_LABEL_POST; 853 return 1; 854 } 855 else { 856 output_citation_group(rptr, count, type, fp); 857 rptr += count ; 858 rcount -= count; 859 state = NORMAL; 860 } 861 break; 862 case PENDING_LABEL_POST: 863 if (c == PRE_LABEL_MARKER) { 864 state = PENDING_LABEL_POST_PRE; 865 return 1; 866 } 867 else { 868 output_citation_group(rptr, count, type, fp); 869 rptr += count; 870 rcount -= count; 871 put_string(post_label, fp); 872 state = NORMAL; 873 } 874 break; 875 case PENDING_LABEL_POST_PRE: 876 if (c >= LABEL_MARKER 877 && c < LABEL_MARKER + N_LABEL_TYPES 878 && c - LABEL_MARKER == type) { 879 count += 1; 880 state = PENDING_LABEL; 881 return 1; 882 } 883 else { 884 output_citation_group(rptr, count, type, fp); 885 rptr += count; 886 rcount -= count; 887 put_string(sep_label, fp); 888 state = NORMAL; 889 } 890 break; 891 case PENDING_POST: 892 if (c == PRE_LABEL_MARKER) { 893 put_string(sep_label, fp); 894 state = NORMAL; 895 return 1; 896 } 897 else { 898 put_string(post_label, fp); 899 state = NORMAL; 900 } 901 break; 902 } 903 return 0; 904} 905 906void label_processing_state::process(int c) 907{ 908 if (handle_pending(c)) 909 return; 910 assert(state == NORMAL); 911 switch (c) { 912 case PRE_LABEL_MARKER: 913 put_string(pre_label, fp); 914 state = NORMAL; 915 break; 916 case POST_LABEL_MARKER: 917 state = PENDING_POST; 918 break; 919 case LABEL_MARKER: 920 case LABEL_MARKER + 1: 921 count = 1; 922 state = PENDING_LABEL; 923 type = label_type(c - LABEL_MARKER); 924 break; 925 default: 926 state = NORMAL; 927 putc(c, fp); 928 break; 929 } 930} 931 932extern "C" { 933 934int rcompare(const void *p1, const void *p2) 935{ 936 return compare_reference(**(reference **)p1, **(reference **)p2); 937} 938 939} 940 941void output_references() 942{ 943 assert(accumulate); 944 if (!hash_table_size) { 945 error("nothing to reference (probably `bibliography' before `sort')"); 946 accumulate = 0; 947 nreferences = 0; 948 return; 949 } 950 if (nreferences > 0) { 951 int j = 0; 952 int i; 953 for (i = 0; i < hash_table_size; i++) 954 if (reference_hash_table[i] != 0) 955 reference_hash_table[j++] = reference_hash_table[i]; 956 assert(j == nreferences); 957 for (; j < hash_table_size; j++) 958 reference_hash_table[j] = 0; 959 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); 960 for (i = 0; i < nreferences; i++) 961 reference_hash_table[i]->set_number(i); 962 compute_labels(reference_hash_table, nreferences); 963 } 964 if (outfp != stdout) { 965 rewind(outfp); 966 { 967 label_processing_state state(citation, ncitations, stdout); 968 int c; 969 while ((c = getc(outfp)) != EOF) 970 state.process(c); 971 } 972 ncitations = 0; 973 fclose(outfp); 974 outfp = stdout; 975 } 976 if (nreferences > 0) { 977 fputs(".]<\n", outfp); 978 for (int i = 0; i < nreferences; i++) { 979 if (sort_fields.length() > 0) 980 reference_hash_table[i]->print_sort_key_comment(outfp); 981 if (label_in_reference) { 982 fputs(".ds [F ", outfp); 983 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); 984 if (label.length() > 0 985 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 986 putc('"', outfp); 987 put_string(label, outfp); 988 putc('\n', outfp); 989 } 990 reference_hash_table[i]->output(outfp); 991 delete reference_hash_table[i]; 992 reference_hash_table[i] = 0; 993 } 994 fputs(".]>\n", outfp); 995 nreferences = 0; 996 } 997 clear_labels(); 998} 999 1000static reference *find_reference(const char *query, int query_len) 1001{ 1002 // This is so that error messages look better. 1003 while (query_len > 0 && csspace(query[query_len - 1])) 1004 query_len--; 1005 string str; 1006 for (int i = 0; i < query_len; i++) 1007 str += query[i] == '\n' ? ' ' : query[i]; 1008 str += '\0'; 1009 possibly_load_default_database(); 1010 search_list_iterator iter(&database_list, str.contents()); 1011 reference_id rid; 1012 const char *start; 1013 int len; 1014 if (!iter.next(&start, &len, &rid)) { 1015 error("no matches for `%1'", str.contents()); 1016 return 0; 1017 } 1018 const char *end = start + len; 1019 while (start < end) { 1020 if (*start == '%') 1021 break; 1022 while (start < end && *start++ != '\n') 1023 ; 1024 } 1025 if (start >= end) { 1026 error("found a reference for `%1' but it didn't contain any fields", 1027 str.contents()); 1028 return 0; 1029 } 1030 reference *result = new reference(start, end - start, &rid); 1031 if (iter.next(&start, &len, &rid)) 1032 warning("multiple matches for `%1'", str.contents()); 1033 return result; 1034} 1035 1036static reference *make_reference(const string &str, unsigned *flagsp) 1037{ 1038 const char *start = str.contents(); 1039 const char *end = start + str.length(); 1040 const char *ptr = start; 1041 while (ptr < end) { 1042 if (*ptr == '%') 1043 break; 1044 while (ptr < end && *ptr++ != '\n') 1045 ; 1046 } 1047 *flagsp = 0; 1048 for (; start < ptr; start++) { 1049 if (*start == '#') 1050 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET 1051 | FORCE_LEFT_BRACKET))); 1052 else if (*start == '[') 1053 *flagsp |= FORCE_LEFT_BRACKET; 1054 else if (*start == ']') 1055 *flagsp |= FORCE_RIGHT_BRACKET; 1056 else if (!csspace(*start)) 1057 break; 1058 } 1059 if (start >= end) { 1060 error("empty reference"); 1061 return new reference; 1062 } 1063 reference *database_ref = 0; 1064 if (start < ptr) 1065 database_ref = find_reference(start, ptr - start); 1066 reference *inline_ref = 0; 1067 if (ptr < end) 1068 inline_ref = new reference(ptr, end - ptr); 1069 if (inline_ref) { 1070 if (database_ref) { 1071 database_ref->merge(*inline_ref); 1072 delete inline_ref; 1073 return database_ref; 1074 } 1075 else 1076 return inline_ref; 1077 } 1078 else if (database_ref) 1079 return database_ref; 1080 else 1081 return new reference; 1082} 1083 1084static void do_ref(const string &str) 1085{ 1086 if (accumulate) 1087 (void)store_reference(str); 1088 else { 1089 (void)immediately_handle_reference(str); 1090 immediately_output_references(); 1091 } 1092} 1093 1094static void trim_blanks(string &str) 1095{ 1096 const char *start = str.contents(); 1097 const char *end = start + str.length(); 1098 while (end > start && end[-1] != '\n' && csspace(end[-1])) 1099 --end; 1100 str.set_length(end - start); 1101} 1102 1103void do_bib(const char *filename) 1104{ 1105 FILE *fp; 1106 if (strcmp(filename, "-") == 0) 1107 fp = stdin; 1108 else { 1109 errno = 0; 1110 fp = fopen(filename, "r"); 1111 if (fp == 0) { 1112 error("can't open `%1': %2", filename, strerror(errno)); 1113 return; 1114 } 1115 current_filename = filename; 1116 } 1117 enum { 1118 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT 1119 } state = START; 1120 string body; 1121 for (;;) { 1122 int c = getc(fp); 1123 if (c == EOF) 1124 break; 1125 if (invalid_input_char(c)) { 1126 error("invalid input character code %1", c); 1127 continue; 1128 } 1129 switch (state) { 1130 case START: 1131 if (c == '%') { 1132 body = c; 1133 state = BODY; 1134 } 1135 else if (c != '\n') 1136 state = MIDDLE; 1137 break; 1138 case MIDDLE: 1139 if (c == '\n') 1140 state = START; 1141 break; 1142 case BODY: 1143 body += c; 1144 if (c == '\n') 1145 state = BODY_START; 1146 break; 1147 case BODY_START: 1148 if (c == '\n') { 1149 do_ref(body); 1150 state = START; 1151 } 1152 else if (c == '.') 1153 state = BODY_DOT; 1154 else if (csspace(c)) { 1155 state = BODY_BLANK; 1156 body += c; 1157 } 1158 else { 1159 body += c; 1160 state = BODY; 1161 } 1162 break; 1163 case BODY_BLANK: 1164 if (c == '\n') { 1165 trim_blanks(body); 1166 do_ref(body); 1167 state = START; 1168 } 1169 else if (csspace(c)) 1170 body += c; 1171 else { 1172 body += c; 1173 state = BODY; 1174 } 1175 break; 1176 case BODY_DOT: 1177 if (c == ']') { 1178 do_ref(body); 1179 state = MIDDLE; 1180 } 1181 else { 1182 body += '.'; 1183 body += c; 1184 state = c == '\n' ? BODY_START : BODY; 1185 } 1186 break; 1187 default: 1188 assert(0); 1189 } 1190 if (c == '\n') 1191 current_lineno++; 1192 } 1193 switch (state) { 1194 case START: 1195 case MIDDLE: 1196 break; 1197 case BODY: 1198 body += '\n'; 1199 do_ref(body); 1200 break; 1201 case BODY_DOT: 1202 case BODY_START: 1203 do_ref(body); 1204 break; 1205 case BODY_BLANK: 1206 trim_blanks(body); 1207 do_ref(body); 1208 break; 1209 } 1210 fclose(fp); 1211} 1212 1213// from the Dragon Book 1214 1215unsigned hash_string(const char *s, int len) 1216{ 1217 const char *end = s + len; 1218 unsigned h = 0, g; 1219 while (s < end) { 1220 h <<= 4; 1221 h += *s++; 1222 if ((g = h & 0xf0000000) != 0) { 1223 h ^= g >> 24; 1224 h ^= g; 1225 } 1226 } 1227 return h; 1228} 1229 1230int next_size(int n) 1231{ 1232 static const int table_sizes[] = { 1233 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, 1234 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, 1235 16000057, 32000011, 64000031, 128000003, 0 1236 }; 1237 1238 const int *p; 1239 for (p = table_sizes; *p <= n && *p != 0; p++) 1240 ; 1241 assert(*p != 0); 1242 return *p; 1243} 1244 1245