1114402Sru// -*- C++ -*- 2151497Sru/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004 3151497Sru Free Software Foundation, Inc. 4114402Sru Written by James Clark (jjc@jclark.com) 5114402Sru 6114402SruThis file is part of groff. 7114402Sru 8114402Srugroff is free software; you can redistribute it and/or modify it under 9114402Sruthe terms of the GNU General Public License as published by the Free 10114402SruSoftware Foundation; either version 2, or (at your option) any later 11114402Sruversion. 12114402Sru 13114402Srugroff is distributed in the hope that it will be useful, but WITHOUT ANY 14114402SruWARRANTY; without even the implied warranty of MERCHANTABILITY or 15114402SruFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16114402Srufor more details. 17114402Sru 18114402SruYou should have received a copy of the GNU General Public License along 19114402Sruwith groff; see the file COPYING. If not, write to the Free Software 20151497SruFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 21114402Sru 22114402Sru#include "refer.h" 23114402Sru#include "refid.h" 24114402Sru#include "ref.h" 25114402Sru#include "token.h" 26114402Sru#include "search.h" 27114402Sru#include "command.h" 28114402Sru 29114402Sruextern "C" const char *Version_string; 30114402Sru 31114402Sruconst char PRE_LABEL_MARKER = '\013'; 32114402Sruconst char POST_LABEL_MARKER = '\014'; 33114402Sruconst char LABEL_MARKER = '\015'; // label_type is added on 34114402Sru 35114402Sru#define FORCE_LEFT_BRACKET 04 36114402Sru#define FORCE_RIGHT_BRACKET 010 37114402Sru 38114402Srustatic FILE *outfp = stdout; 39114402Sru 40114402Srustring capitalize_fields; 41114402Srustring reverse_fields; 42114402Srustring abbreviate_fields; 43114402Srustring period_before_last_name = ". "; 44114402Srustring period_before_initial = "."; 45114402Srustring period_before_hyphen = ""; 46114402Srustring period_before_other = ". "; 47114402Srustring sort_fields; 48114402Sruint annotation_field = -1; 49114402Srustring annotation_macro; 50114402Srustring discard_fields = "XYZ"; 51114402Srustring pre_label = "\\*([."; 52114402Srustring post_label = "\\*(.]"; 53114402Srustring sep_label = ", "; 54114402Sruint accumulate = 0; 55114402Sruint move_punctuation = 0; 56114402Sruint abbreviate_label_ranges = 0; 57114402Srustring label_range_indicator; 58114402Sruint label_in_text = 1; 59114402Sruint label_in_reference = 1; 60114402Sruint date_as_label = 0; 61114402Sruint sort_adjacent_labels = 0; 62114402Sru// Join exactly two authors with this. 63114402Srustring join_authors_exactly_two = " and "; 64114402Sru// When there are more than two authors join the last two with this. 65114402Srustring join_authors_last_two = ", and "; 66114402Sru// Otherwise join authors with this. 67114402Srustring join_authors_default = ", "; 68114402Srustring separate_label_second_parts = ", "; 69114402Sru// Use this string to represent that there are other authors. 70114402Srustring et_al = " et al"; 71114402Sru// Use et al only if it can replace at least this many authors. 72114402Sruint et_al_min_elide = 2; 73114402Sru// Use et al only if the total number of authors is at least this. 74114402Sruint et_al_min_total = 3; 75114402Sru 76114402Sru 77114402Sruint compatible_flag = 0; 78114402Sru 79114402Sruint short_label_flag = 0; 80114402Sru 81114402Srustatic int recognize_R1_R2 = 1; 82114402Sru 83114402Srusearch_list database_list; 84114402Sruint search_default = 1; 85114402Srustatic int default_database_loaded = 0; 86114402Sru 87114402Srustatic reference **citation = 0; 88114402Srustatic int ncitations = 0; 89114402Srustatic int citation_max = 0; 90114402Sru 91114402Srustatic reference **reference_hash_table = 0; 92114402Srustatic int hash_table_size; 93114402Srustatic int nreferences = 0; 94114402Sru 95114402Srustatic int need_syncing = 0; 96114402Srustring pending_line; 97114402Srustring pending_lf_lines; 98114402Sru 99114402Srustatic void output_pending_line(); 100114402Srustatic unsigned immediately_handle_reference(const string &); 101114402Srustatic void immediately_output_references(); 102114402Srustatic unsigned store_reference(const string &); 103114402Srustatic void divert_to_temporary_file(); 104114402Srustatic reference *make_reference(const string &, unsigned *); 105114402Srustatic void usage(FILE *stream); 106114402Srustatic void do_file(const char *); 107114402Srustatic void split_punct(string &line, string &punct); 108114402Srustatic void output_citation_group(reference **v, int n, label_type, FILE *fp); 109114402Srustatic void possibly_load_default_database(); 110114402Sru 111114402Sruint main(int argc, char **argv) 112114402Sru{ 113114402Sru program_name = argv[0]; 114114402Sru static char stderr_buf[BUFSIZ]; 115114402Sru setbuf(stderr, stderr_buf); 116114402Sru outfp = stdout; 117114402Sru int finished_options = 0; 118114402Sru int bib_flag = 0; 119114402Sru int done_spec = 0; 120114402Sru 121114402Sru for (--argc, ++argv; 122114402Sru !finished_options && argc > 0 && argv[0][0] == '-' 123114402Sru && argv[0][1] != '\0'; 124114402Sru argv++, argc--) { 125114402Sru const char *opt = argv[0] + 1; 126114402Sru while (opt != 0 && *opt != '\0') { 127114402Sru switch (*opt) { 128114402Sru case 'C': 129114402Sru compatible_flag = 1; 130114402Sru opt++; 131114402Sru break; 132114402Sru case 'B': 133114402Sru bib_flag = 1; 134114402Sru label_in_reference = 0; 135114402Sru label_in_text = 0; 136114402Sru ++opt; 137114402Sru if (*opt == '\0') { 138114402Sru annotation_field = 'X'; 139114402Sru annotation_macro = "AP"; 140114402Sru } 141114402Sru else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { 142114402Sru annotation_field = opt[0]; 143114402Sru annotation_macro = opt + 2; 144114402Sru } 145114402Sru opt = 0; 146114402Sru break; 147114402Sru case 'P': 148114402Sru move_punctuation = 1; 149114402Sru opt++; 150114402Sru break; 151114402Sru case 'R': 152114402Sru recognize_R1_R2 = 0; 153114402Sru opt++; 154114402Sru break; 155114402Sru case 'S': 156114402Sru // Not a very useful spec. 157114402Sru set_label_spec("(A.n|Q)', '(D.y|D)"); 158114402Sru done_spec = 1; 159114402Sru pre_label = " ("; 160114402Sru post_label = ")"; 161114402Sru sep_label = "; "; 162114402Sru opt++; 163114402Sru break; 164114402Sru case 'V': 165114402Sru verify_flag = 1; 166114402Sru opt++; 167114402Sru break; 168114402Sru case 'f': 169114402Sru { 170114402Sru const char *num = 0; 171114402Sru if (*++opt == '\0') { 172114402Sru if (argc > 1) { 173114402Sru num = *++argv; 174114402Sru --argc; 175114402Sru } 176114402Sru else { 177114402Sru error("option `f' requires an argument"); 178114402Sru usage(stderr); 179114402Sru exit(1); 180114402Sru } 181114402Sru } 182114402Sru else { 183114402Sru num = opt; 184114402Sru opt = 0; 185114402Sru } 186114402Sru const char *ptr; 187114402Sru for (ptr = num; *ptr; ptr++) 188114402Sru if (!csdigit(*ptr)) { 189114402Sru error("bad character `%1' in argument to -f option", *ptr); 190114402Sru break; 191114402Sru } 192114402Sru if (*ptr == '\0') { 193114402Sru string spec; 194114402Sru spec = '%'; 195114402Sru spec += num; 196114402Sru spec += '\0'; 197114402Sru set_label_spec(spec.contents()); 198114402Sru done_spec = 1; 199114402Sru } 200114402Sru break; 201114402Sru } 202114402Sru case 'b': 203114402Sru label_in_text = 0; 204114402Sru label_in_reference = 0; 205114402Sru opt++; 206114402Sru break; 207114402Sru case 'e': 208114402Sru accumulate = 1; 209114402Sru opt++; 210114402Sru break; 211114402Sru case 'c': 212114402Sru capitalize_fields = ++opt; 213114402Sru opt = 0; 214114402Sru break; 215114402Sru case 'k': 216114402Sru { 217114402Sru char buf[5]; 218114402Sru if (csalpha(*++opt)) 219114402Sru buf[0] = *opt++; 220114402Sru else { 221114402Sru if (*opt != '\0') 222114402Sru error("bad field name `%1'", *opt++); 223114402Sru buf[0] = 'L'; 224114402Sru } 225114402Sru buf[1] = '~'; 226114402Sru buf[2] = '%'; 227114402Sru buf[3] = 'a'; 228114402Sru buf[4] = '\0'; 229114402Sru set_label_spec(buf); 230114402Sru done_spec = 1; 231114402Sru } 232114402Sru break; 233114402Sru case 'a': 234114402Sru { 235114402Sru const char *ptr; 236114402Sru for (ptr = ++opt; *ptr; ptr++) 237114402Sru if (!csdigit(*ptr)) { 238114402Sru error("argument to `a' option not a number"); 239114402Sru break; 240114402Sru } 241114402Sru if (*ptr == '\0') { 242114402Sru reverse_fields = 'A'; 243114402Sru reverse_fields += opt; 244114402Sru } 245114402Sru opt = 0; 246114402Sru } 247114402Sru break; 248114402Sru case 'i': 249114402Sru linear_ignore_fields = ++opt; 250114402Sru opt = 0; 251114402Sru break; 252114402Sru case 'l': 253114402Sru { 254114402Sru char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a 255114402Sru strcpy(buf, "A.n"); 256114402Sru if (*++opt != '\0' && *opt != ',') { 257114402Sru char *ptr; 258114402Sru long n = strtol(opt, &ptr, 10); 259114402Sru if (n == 0 && ptr == opt) { 260114402Sru error("bad integer `%1' in `l' option", opt); 261114402Sru opt = 0; 262114402Sru break; 263114402Sru } 264114402Sru if (n < 0) 265114402Sru n = 0; 266114402Sru opt = ptr; 267114402Sru sprintf(strchr(buf, '\0'), "+%ld", n); 268114402Sru } 269114402Sru strcat(buf, "D.y"); 270114402Sru if (*opt == ',') 271114402Sru opt++; 272114402Sru if (*opt != '\0') { 273114402Sru char *ptr; 274114402Sru long n = strtol(opt, &ptr, 10); 275114402Sru if (n == 0 && ptr == opt) { 276114402Sru error("bad integer `%1' in `l' option", opt); 277114402Sru opt = 0; 278114402Sru break; 279114402Sru } 280114402Sru if (n < 0) 281114402Sru n = 0; 282114402Sru sprintf(strchr(buf, '\0'), "-%ld", n); 283114402Sru opt = ptr; 284114402Sru if (*opt != '\0') 285114402Sru error("argument to `l' option not of form `m,n'"); 286114402Sru } 287114402Sru strcat(buf, "%a"); 288114402Sru if (!set_label_spec(buf)) 289114402Sru assert(0); 290114402Sru done_spec = 1; 291114402Sru } 292114402Sru break; 293114402Sru case 'n': 294114402Sru search_default = 0; 295114402Sru opt++; 296114402Sru break; 297114402Sru case 'p': 298114402Sru { 299114402Sru const char *filename = 0; 300114402Sru if (*++opt == '\0') { 301114402Sru if (argc > 1) { 302114402Sru filename = *++argv; 303114402Sru argc--; 304114402Sru } 305114402Sru else { 306114402Sru error("option `p' requires an argument"); 307114402Sru usage(stderr); 308114402Sru exit(1); 309114402Sru } 310114402Sru } 311114402Sru else { 312114402Sru filename = opt; 313114402Sru opt = 0; 314114402Sru } 315114402Sru database_list.add_file(filename); 316114402Sru } 317114402Sru break; 318114402Sru case 's': 319114402Sru if (*++opt == '\0') 320114402Sru sort_fields = "AD"; 321114402Sru else { 322114402Sru sort_fields = opt; 323114402Sru opt = 0; 324114402Sru } 325114402Sru accumulate = 1; 326114402Sru break; 327114402Sru case 't': 328114402Sru { 329114402Sru char *ptr; 330114402Sru long n = strtol(opt, &ptr, 10); 331114402Sru if (n == 0 && ptr == opt) { 332114402Sru error("bad integer `%1' in `t' option", opt); 333114402Sru opt = 0; 334114402Sru break; 335114402Sru } 336114402Sru if (n < 1) 337114402Sru n = 1; 338114402Sru linear_truncate_len = int(n); 339114402Sru opt = ptr; 340114402Sru break; 341114402Sru } 342114402Sru case '-': 343114402Sru if (opt[1] == '\0') { 344114402Sru finished_options = 1; 345114402Sru opt++; 346114402Sru break; 347114402Sru } 348114402Sru if (strcmp(opt,"-version")==0) { 349114402Sru case 'v': 350114402Sru printf("GNU refer (groff) version %s\n", Version_string); 351114402Sru exit(0); 352114402Sru break; 353114402Sru } 354114402Sru if (strcmp(opt,"-help")==0) { 355114402Sru usage(stdout); 356114402Sru exit(0); 357114402Sru break; 358114402Sru } 359114402Sru // fall through 360114402Sru default: 361114402Sru error("unrecognized option `%1'", *opt); 362114402Sru usage(stderr); 363114402Sru exit(1); 364114402Sru break; 365114402Sru } 366114402Sru } 367114402Sru } 368114402Sru if (!done_spec) 369114402Sru set_label_spec("%1"); 370114402Sru if (argc <= 0) { 371114402Sru if (bib_flag) 372114402Sru do_bib("-"); 373114402Sru else 374114402Sru do_file("-"); 375114402Sru } 376114402Sru else { 377114402Sru for (int i = 0; i < argc; i++) { 378114402Sru if (bib_flag) 379114402Sru do_bib(argv[i]); 380114402Sru else 381114402Sru do_file(argv[i]); 382114402Sru } 383114402Sru } 384114402Sru if (accumulate) 385114402Sru output_references(); 386114402Sru if (fflush(stdout) < 0) 387114402Sru fatal("output error"); 388114402Sru return 0; 389114402Sru} 390114402Sru 391114402Srustatic void usage(FILE *stream) 392114402Sru{ 393114402Sru fprintf(stream, 394114402Sru"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" 395114402Sru" [-sXYZ] [-tN] [-BL.M] [files ...]\n", 396114402Sru program_name); 397114402Sru} 398114402Sru 399114402Srustatic void possibly_load_default_database() 400114402Sru{ 401114402Sru if (search_default && !default_database_loaded) { 402114402Sru char *filename = getenv("REFER"); 403114402Sru if (filename) 404114402Sru database_list.add_file(filename); 405114402Sru else 406114402Sru database_list.add_file(DEFAULT_INDEX, 1); 407114402Sru default_database_loaded = 1; 408114402Sru } 409114402Sru} 410114402Sru 411114402Srustatic int is_list(const string &str) 412114402Sru{ 413114402Sru const char *start = str.contents(); 414114402Sru const char *end = start + str.length(); 415114402Sru while (end > start && csspace(end[-1])) 416114402Sru end--; 417114402Sru while (start < end && csspace(*start)) 418114402Sru start++; 419114402Sru return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; 420114402Sru} 421114402Sru 422114402Srustatic void do_file(const char *filename) 423114402Sru{ 424114402Sru FILE *fp; 425114402Sru if (strcmp(filename, "-") == 0) { 426114402Sru fp = stdin; 427114402Sru } 428114402Sru else { 429114402Sru errno = 0; 430114402Sru fp = fopen(filename, "r"); 431114402Sru if (fp == 0) { 432114402Sru error("can't open `%1': %2", filename, strerror(errno)); 433114402Sru return; 434114402Sru } 435114402Sru } 436114402Sru current_filename = filename; 437114402Sru fprintf(outfp, ".lf 1 %s\n", filename); 438114402Sru string line; 439114402Sru current_lineno = 0; 440114402Sru for (;;) { 441114402Sru line.clear(); 442114402Sru for (;;) { 443114402Sru int c = getc(fp); 444114402Sru if (c == EOF) { 445114402Sru if (line.length() > 0) 446114402Sru line += '\n'; 447114402Sru break; 448114402Sru } 449114402Sru if (invalid_input_char(c)) 450114402Sru error("invalid input character code %1", c); 451114402Sru else { 452114402Sru line += c; 453114402Sru if (c == '\n') 454114402Sru break; 455114402Sru } 456114402Sru } 457114402Sru int len = line.length(); 458114402Sru if (len == 0) 459114402Sru break; 460114402Sru current_lineno++; 461114402Sru if (len >= 2 && line[0] == '.' && line[1] == '[') { 462114402Sru int start_lineno = current_lineno; 463114402Sru int start_of_line = 1; 464114402Sru string str; 465114402Sru string post; 466114402Sru string pre(line.contents() + 2, line.length() - 3); 467114402Sru for (;;) { 468114402Sru int c = getc(fp); 469114402Sru if (c == EOF) { 470114402Sru error_with_file_and_line(current_filename, start_lineno, 471114402Sru "missing `.]' line"); 472114402Sru break; 473114402Sru } 474114402Sru if (start_of_line) 475114402Sru current_lineno++; 476114402Sru if (start_of_line && c == '.') { 477114402Sru int d = getc(fp); 478114402Sru if (d == ']') { 479114402Sru while ((d = getc(fp)) != '\n' && d != EOF) { 480114402Sru if (invalid_input_char(d)) 481114402Sru error("invalid input character code %1", d); 482114402Sru else 483114402Sru post += d; 484114402Sru } 485114402Sru break; 486114402Sru } 487114402Sru if (d != EOF) 488114402Sru ungetc(d, fp); 489114402Sru } 490114402Sru if (invalid_input_char(c)) 491114402Sru error("invalid input character code %1", c); 492114402Sru else 493114402Sru str += c; 494114402Sru start_of_line = (c == '\n'); 495114402Sru } 496114402Sru if (is_list(str)) { 497114402Sru output_pending_line(); 498114402Sru if (accumulate) 499114402Sru output_references(); 500114402Sru else 501114402Sru error("found `$LIST$' but not accumulating references"); 502114402Sru } 503114402Sru else { 504114402Sru unsigned flags = (accumulate 505114402Sru ? store_reference(str) 506114402Sru : immediately_handle_reference(str)); 507114402Sru if (label_in_text) { 508114402Sru if (accumulate && outfp == stdout) 509114402Sru divert_to_temporary_file(); 510114402Sru if (pending_line.length() == 0) { 511114402Sru warning("can't attach citation to previous line"); 512114402Sru } 513114402Sru else 514114402Sru pending_line.set_length(pending_line.length() - 1); 515114402Sru string punct; 516114402Sru if (move_punctuation) 517114402Sru split_punct(pending_line, punct); 518114402Sru int have_text = pre.length() > 0 || post.length() > 0; 519114402Sru label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET 520114402Sru |FORCE_RIGHT_BRACKET)); 521114402Sru if ((flags & FORCE_LEFT_BRACKET) || !have_text) 522114402Sru pending_line += PRE_LABEL_MARKER; 523114402Sru pending_line += pre; 524114402Sru char lm = LABEL_MARKER + (int)lt; 525114402Sru pending_line += lm; 526114402Sru pending_line += post; 527114402Sru if ((flags & FORCE_RIGHT_BRACKET) || !have_text) 528114402Sru pending_line += POST_LABEL_MARKER; 529114402Sru pending_line += punct; 530114402Sru pending_line += '\n'; 531114402Sru } 532114402Sru } 533114402Sru need_syncing = 1; 534114402Sru } 535114402Sru else if (len >= 4 536114402Sru && line[0] == '.' && line[1] == 'l' && line[2] == 'f' 537114402Sru && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 538114402Sru pending_lf_lines += line; 539114402Sru line += '\0'; 540114402Sru if (interpret_lf_args(line.contents() + 3)) 541114402Sru current_lineno--; 542114402Sru } 543114402Sru else if (recognize_R1_R2 544114402Sru && len >= 4 545114402Sru && line[0] == '.' && line[1] == 'R' && line[2] == '1' 546114402Sru && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { 547114402Sru line.clear(); 548114402Sru int start_of_line = 1; 549114402Sru int start_lineno = current_lineno; 550114402Sru for (;;) { 551114402Sru int c = getc(fp); 552114402Sru if (c != EOF && start_of_line) 553114402Sru current_lineno++; 554114402Sru if (start_of_line && c == '.') { 555114402Sru c = getc(fp); 556114402Sru if (c == 'R') { 557114402Sru c = getc(fp); 558114402Sru if (c == '2') { 559114402Sru c = getc(fp); 560114402Sru if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { 561114402Sru while (c != EOF && c != '\n') 562114402Sru c = getc(fp); 563114402Sru break; 564114402Sru } 565114402Sru else { 566114402Sru line += '.'; 567114402Sru line += 'R'; 568114402Sru line += '2'; 569114402Sru } 570114402Sru } 571114402Sru else { 572114402Sru line += '.'; 573114402Sru line += 'R'; 574114402Sru } 575114402Sru } 576114402Sru else 577114402Sru line += '.'; 578114402Sru } 579114402Sru if (c == EOF) { 580114402Sru error_with_file_and_line(current_filename, start_lineno, 581114402Sru "missing `.R2' line"); 582114402Sru break; 583114402Sru } 584114402Sru if (invalid_input_char(c)) 585114402Sru error("invalid input character code %1", int(c)); 586114402Sru else { 587114402Sru line += c; 588114402Sru start_of_line = c == '\n'; 589114402Sru } 590114402Sru } 591114402Sru output_pending_line(); 592114402Sru if (accumulate) 593114402Sru output_references(); 594114402Sru else 595114402Sru nreferences = 0; 596114402Sru process_commands(line, current_filename, start_lineno + 1); 597114402Sru need_syncing = 1; 598114402Sru } 599114402Sru else { 600114402Sru output_pending_line(); 601114402Sru pending_line = line; 602114402Sru } 603114402Sru } 604114402Sru need_syncing = 0; 605114402Sru output_pending_line(); 606114402Sru if (fp != stdin) 607114402Sru fclose(fp); 608114402Sru} 609114402Sru 610114402Sruclass label_processing_state { 611114402Sru enum { 612114402Sru NORMAL, 613114402Sru PENDING_LABEL, 614114402Sru PENDING_LABEL_POST, 615114402Sru PENDING_LABEL_POST_PRE, 616114402Sru PENDING_POST 617114402Sru } state; 618114402Sru label_type type; // type of pending labels 619114402Sru int count; // number of pending labels 620114402Sru reference **rptr; // pointer to next reference 621114402Sru int rcount; // number of references left 622114402Sru FILE *fp; 623114402Sru int handle_pending(int c); 624114402Srupublic: 625114402Sru label_processing_state(reference **, int, FILE *); 626114402Sru ~label_processing_state(); 627114402Sru void process(int c); 628114402Sru}; 629114402Sru 630114402Srustatic void output_pending_line() 631114402Sru{ 632114402Sru if (label_in_text && !accumulate && ncitations > 0) { 633114402Sru label_processing_state state(citation, ncitations, outfp); 634114402Sru int len = pending_line.length(); 635114402Sru for (int i = 0; i < len; i++) 636114402Sru state.process((unsigned char)(pending_line[i])); 637114402Sru } 638114402Sru else 639114402Sru put_string(pending_line, outfp); 640114402Sru pending_line.clear(); 641114402Sru if (pending_lf_lines.length() > 0) { 642114402Sru put_string(pending_lf_lines, outfp); 643114402Sru pending_lf_lines.clear(); 644114402Sru } 645114402Sru if (!accumulate) 646114402Sru immediately_output_references(); 647114402Sru if (need_syncing) { 648114402Sru fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); 649114402Sru need_syncing = 0; 650114402Sru } 651114402Sru} 652114402Sru 653114402Srustatic void split_punct(string &line, string &punct) 654114402Sru{ 655114402Sru const char *start = line.contents(); 656114402Sru const char *end = start + line.length(); 657114402Sru const char *ptr = start; 658114402Sru const char *last_token_start = 0; 659114402Sru for (;;) { 660114402Sru if (ptr >= end) 661114402Sru break; 662114402Sru last_token_start = ptr; 663114402Sru if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER 664114402Sru || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) 665114402Sru ptr++; 666114402Sru else if (!get_token(&ptr, end)) 667114402Sru break; 668114402Sru } 669114402Sru if (last_token_start) { 670114402Sru const token_info *ti = lookup_token(last_token_start, end); 671114402Sru if (ti->is_punct()) { 672114402Sru punct.append(last_token_start, end - last_token_start); 673114402Sru line.set_length(last_token_start - start); 674114402Sru } 675114402Sru } 676114402Sru} 677114402Sru 678114402Srustatic void divert_to_temporary_file() 679114402Sru{ 680114402Sru outfp = xtmpfile(); 681114402Sru} 682114402Sru 683114402Srustatic void store_citation(reference *ref) 684114402Sru{ 685114402Sru if (ncitations >= citation_max) { 686114402Sru if (citation == 0) 687114402Sru citation = new reference*[citation_max = 100]; 688114402Sru else { 689114402Sru reference **old_citation = citation; 690114402Sru citation_max *= 2; 691114402Sru citation = new reference *[citation_max]; 692114402Sru memcpy(citation, old_citation, ncitations*sizeof(reference *)); 693114402Sru a_delete old_citation; 694114402Sru } 695114402Sru } 696114402Sru citation[ncitations++] = ref; 697114402Sru} 698114402Sru 699114402Srustatic unsigned store_reference(const string &str) 700114402Sru{ 701114402Sru if (reference_hash_table == 0) { 702114402Sru reference_hash_table = new reference *[17]; 703114402Sru hash_table_size = 17; 704114402Sru for (int i = 0; i < hash_table_size; i++) 705114402Sru reference_hash_table[i] = 0; 706114402Sru } 707114402Sru unsigned flags; 708114402Sru reference *ref = make_reference(str, &flags); 709114402Sru ref->compute_hash_code(); 710114402Sru unsigned h = ref->hash(); 711114402Sru reference **ptr; 712114402Sru for (ptr = reference_hash_table + (h % hash_table_size); 713114402Sru *ptr != 0; 714114402Sru ((ptr == reference_hash_table) 715114402Sru ? (ptr = reference_hash_table + hash_table_size - 1) 716114402Sru : --ptr)) 717114402Sru if (same_reference(**ptr, *ref)) 718114402Sru break; 719114402Sru if (*ptr != 0) { 720114402Sru if (ref->is_merged()) 721114402Sru warning("fields ignored because reference already used"); 722114402Sru delete ref; 723114402Sru ref = *ptr; 724114402Sru } 725114402Sru else { 726114402Sru *ptr = ref; 727114402Sru ref->set_number(nreferences); 728114402Sru nreferences++; 729114402Sru ref->pre_compute_label(); 730114402Sru ref->compute_sort_key(); 731114402Sru if (nreferences*2 >= hash_table_size) { 732114402Sru // Rehash it. 733114402Sru reference **old_table = reference_hash_table; 734114402Sru int old_size = hash_table_size; 735114402Sru hash_table_size = next_size(hash_table_size); 736114402Sru reference_hash_table = new reference*[hash_table_size]; 737114402Sru int i; 738114402Sru for (i = 0; i < hash_table_size; i++) 739114402Sru reference_hash_table[i] = 0; 740114402Sru for (i = 0; i < old_size; i++) 741114402Sru if (old_table[i]) { 742114402Sru reference **p; 743114402Sru for (p = (reference_hash_table 744114402Sru + (old_table[i]->hash() % hash_table_size)); 745114402Sru *p; 746114402Sru ((p == reference_hash_table) 747114402Sru ? (p = reference_hash_table + hash_table_size - 1) 748114402Sru : --p)) 749114402Sru ; 750114402Sru *p = old_table[i]; 751114402Sru } 752114402Sru a_delete old_table; 753114402Sru } 754114402Sru } 755114402Sru if (label_in_text) 756114402Sru store_citation(ref); 757114402Sru return flags; 758114402Sru} 759114402Sru 760114402Sruunsigned immediately_handle_reference(const string &str) 761114402Sru{ 762114402Sru unsigned flags; 763114402Sru reference *ref = make_reference(str, &flags); 764114402Sru ref->set_number(nreferences); 765114402Sru if (label_in_text || label_in_reference) { 766114402Sru ref->pre_compute_label(); 767114402Sru ref->immediate_compute_label(); 768114402Sru } 769114402Sru nreferences++; 770114402Sru store_citation(ref); 771114402Sru return flags; 772114402Sru} 773114402Sru 774114402Srustatic void immediately_output_references() 775114402Sru{ 776114402Sru for (int i = 0; i < ncitations; i++) { 777114402Sru reference *ref = citation[i]; 778114402Sru if (label_in_reference) { 779114402Sru fputs(".ds [F ", outfp); 780114402Sru const string &label = ref->get_label(NORMAL_LABEL); 781114402Sru if (label.length() > 0 782114402Sru && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 783114402Sru putc('"', outfp); 784114402Sru put_string(label, outfp); 785114402Sru putc('\n', outfp); 786114402Sru } 787114402Sru ref->output(outfp); 788114402Sru delete ref; 789114402Sru } 790114402Sru ncitations = 0; 791114402Sru} 792114402Sru 793114402Srustatic void output_citation_group(reference **v, int n, label_type type, 794114402Sru FILE *fp) 795114402Sru{ 796114402Sru if (sort_adjacent_labels) { 797114402Sru // Do an insertion sort. Usually n will be very small. 798114402Sru for (int i = 1; i < n; i++) { 799114402Sru int num = v[i]->get_number(); 800114402Sru reference *temp = v[i]; 801114402Sru int j; 802114402Sru for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) 803114402Sru v[j + 1] = v[j]; 804114402Sru v[j + 1] = temp; 805114402Sru } 806114402Sru } 807114402Sru // This messes up if !accumulate. 808114402Sru if (accumulate && n > 1) { 809114402Sru // remove duplicates 810114402Sru int j = 1; 811114402Sru for (int i = 1; i < n; i++) 812114402Sru if (v[i]->get_label(type) != v[i - 1]->get_label(type)) 813114402Sru v[j++] = v[i]; 814114402Sru n = j; 815114402Sru } 816114402Sru string merged_label; 817114402Sru for (int i = 0; i < n; i++) { 818114402Sru int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); 819114402Sru if (nmerged > 0) { 820114402Sru put_string(merged_label, fp); 821114402Sru i += nmerged; 822114402Sru } 823114402Sru else 824114402Sru put_string(v[i]->get_label(type), fp); 825114402Sru if (i < n - 1) 826114402Sru put_string(sep_label, fp); 827114402Sru } 828114402Sru} 829114402Sru 830114402Sru 831114402Srulabel_processing_state::label_processing_state(reference **p, int n, FILE *f) 832114402Sru: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) 833114402Sru{ 834114402Sru} 835114402Sru 836114402Srulabel_processing_state::~label_processing_state() 837114402Sru{ 838114402Sru int handled = handle_pending(EOF); 839114402Sru assert(!handled); 840114402Sru assert(rcount == 0); 841114402Sru} 842114402Sru 843114402Sruint label_processing_state::handle_pending(int c) 844114402Sru{ 845114402Sru switch (state) { 846114402Sru case NORMAL: 847114402Sru break; 848114402Sru case PENDING_LABEL: 849114402Sru if (c == POST_LABEL_MARKER) { 850114402Sru state = PENDING_LABEL_POST; 851114402Sru return 1; 852114402Sru } 853114402Sru else { 854114402Sru output_citation_group(rptr, count, type, fp); 855114402Sru rptr += count ; 856114402Sru rcount -= count; 857114402Sru state = NORMAL; 858114402Sru } 859114402Sru break; 860114402Sru case PENDING_LABEL_POST: 861114402Sru if (c == PRE_LABEL_MARKER) { 862114402Sru state = PENDING_LABEL_POST_PRE; 863114402Sru return 1; 864114402Sru } 865114402Sru else { 866114402Sru output_citation_group(rptr, count, type, fp); 867114402Sru rptr += count; 868114402Sru rcount -= count; 869114402Sru put_string(post_label, fp); 870114402Sru state = NORMAL; 871114402Sru } 872114402Sru break; 873114402Sru case PENDING_LABEL_POST_PRE: 874114402Sru if (c >= LABEL_MARKER 875114402Sru && c < LABEL_MARKER + N_LABEL_TYPES 876114402Sru && c - LABEL_MARKER == type) { 877114402Sru count += 1; 878114402Sru state = PENDING_LABEL; 879114402Sru return 1; 880114402Sru } 881114402Sru else { 882114402Sru output_citation_group(rptr, count, type, fp); 883114402Sru rptr += count; 884114402Sru rcount -= count; 885114402Sru put_string(sep_label, fp); 886114402Sru state = NORMAL; 887114402Sru } 888114402Sru break; 889114402Sru case PENDING_POST: 890114402Sru if (c == PRE_LABEL_MARKER) { 891114402Sru put_string(sep_label, fp); 892114402Sru state = NORMAL; 893114402Sru return 1; 894114402Sru } 895114402Sru else { 896114402Sru put_string(post_label, fp); 897114402Sru state = NORMAL; 898114402Sru } 899114402Sru break; 900114402Sru } 901114402Sru return 0; 902114402Sru} 903114402Sru 904114402Sruvoid label_processing_state::process(int c) 905114402Sru{ 906114402Sru if (handle_pending(c)) 907114402Sru return; 908114402Sru assert(state == NORMAL); 909114402Sru switch (c) { 910114402Sru case PRE_LABEL_MARKER: 911114402Sru put_string(pre_label, fp); 912114402Sru state = NORMAL; 913114402Sru break; 914114402Sru case POST_LABEL_MARKER: 915114402Sru state = PENDING_POST; 916114402Sru break; 917114402Sru case LABEL_MARKER: 918114402Sru case LABEL_MARKER + 1: 919114402Sru count = 1; 920114402Sru state = PENDING_LABEL; 921114402Sru type = label_type(c - LABEL_MARKER); 922114402Sru break; 923114402Sru default: 924114402Sru state = NORMAL; 925114402Sru putc(c, fp); 926114402Sru break; 927114402Sru } 928114402Sru} 929114402Sru 930114402Sruextern "C" { 931114402Sru 932114402Sruint rcompare(const void *p1, const void *p2) 933114402Sru{ 934114402Sru return compare_reference(**(reference **)p1, **(reference **)p2); 935114402Sru} 936114402Sru 937114402Sru} 938114402Sru 939114402Sruvoid output_references() 940114402Sru{ 941114402Sru assert(accumulate); 942151497Sru if (!hash_table_size) { 943151497Sru error("nothing to reference (probably `bibliography' before `sort')"); 944151497Sru accumulate = 0; 945151497Sru nreferences = 0; 946151497Sru return; 947151497Sru } 948114402Sru if (nreferences > 0) { 949114402Sru int j = 0; 950114402Sru int i; 951114402Sru for (i = 0; i < hash_table_size; i++) 952114402Sru if (reference_hash_table[i] != 0) 953114402Sru reference_hash_table[j++] = reference_hash_table[i]; 954114402Sru assert(j == nreferences); 955114402Sru for (; j < hash_table_size; j++) 956114402Sru reference_hash_table[j] = 0; 957114402Sru qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); 958114402Sru for (i = 0; i < nreferences; i++) 959114402Sru reference_hash_table[i]->set_number(i); 960114402Sru compute_labels(reference_hash_table, nreferences); 961114402Sru } 962114402Sru if (outfp != stdout) { 963114402Sru rewind(outfp); 964114402Sru { 965114402Sru label_processing_state state(citation, ncitations, stdout); 966114402Sru int c; 967114402Sru while ((c = getc(outfp)) != EOF) 968114402Sru state.process(c); 969114402Sru } 970114402Sru ncitations = 0; 971114402Sru fclose(outfp); 972114402Sru outfp = stdout; 973114402Sru } 974114402Sru if (nreferences > 0) { 975114402Sru fputs(".]<\n", outfp); 976114402Sru for (int i = 0; i < nreferences; i++) { 977114402Sru if (sort_fields.length() > 0) 978114402Sru reference_hash_table[i]->print_sort_key_comment(outfp); 979114402Sru if (label_in_reference) { 980114402Sru fputs(".ds [F ", outfp); 981114402Sru const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); 982114402Sru if (label.length() > 0 983114402Sru && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) 984114402Sru putc('"', outfp); 985114402Sru put_string(label, outfp); 986114402Sru putc('\n', outfp); 987114402Sru } 988114402Sru reference_hash_table[i]->output(outfp); 989114402Sru delete reference_hash_table[i]; 990114402Sru reference_hash_table[i] = 0; 991114402Sru } 992114402Sru fputs(".]>\n", outfp); 993114402Sru nreferences = 0; 994114402Sru } 995114402Sru clear_labels(); 996114402Sru} 997114402Sru 998114402Srustatic reference *find_reference(const char *query, int query_len) 999114402Sru{ 1000114402Sru // This is so that error messages look better. 1001114402Sru while (query_len > 0 && csspace(query[query_len - 1])) 1002114402Sru query_len--; 1003114402Sru string str; 1004114402Sru for (int i = 0; i < query_len; i++) 1005114402Sru str += query[i] == '\n' ? ' ' : query[i]; 1006114402Sru str += '\0'; 1007114402Sru possibly_load_default_database(); 1008114402Sru search_list_iterator iter(&database_list, str.contents()); 1009114402Sru reference_id rid; 1010114402Sru const char *start; 1011114402Sru int len; 1012114402Sru if (!iter.next(&start, &len, &rid)) { 1013114402Sru error("no matches for `%1'", str.contents()); 1014114402Sru return 0; 1015114402Sru } 1016114402Sru const char *end = start + len; 1017114402Sru while (start < end) { 1018114402Sru if (*start == '%') 1019114402Sru break; 1020114402Sru while (start < end && *start++ != '\n') 1021114402Sru ; 1022114402Sru } 1023114402Sru if (start >= end) { 1024114402Sru error("found a reference for `%1' but it didn't contain any fields", 1025114402Sru str.contents()); 1026114402Sru return 0; 1027114402Sru } 1028114402Sru reference *result = new reference(start, end - start, &rid); 1029114402Sru if (iter.next(&start, &len, &rid)) 1030114402Sru warning("multiple matches for `%1'", str.contents()); 1031114402Sru return result; 1032114402Sru} 1033114402Sru 1034114402Srustatic reference *make_reference(const string &str, unsigned *flagsp) 1035114402Sru{ 1036114402Sru const char *start = str.contents(); 1037114402Sru const char *end = start + str.length(); 1038114402Sru const char *ptr = start; 1039114402Sru while (ptr < end) { 1040114402Sru if (*ptr == '%') 1041114402Sru break; 1042114402Sru while (ptr < end && *ptr++ != '\n') 1043114402Sru ; 1044114402Sru } 1045114402Sru *flagsp = 0; 1046114402Sru for (; start < ptr; start++) { 1047114402Sru if (*start == '#') 1048114402Sru *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET 1049114402Sru | FORCE_LEFT_BRACKET))); 1050114402Sru else if (*start == '[') 1051114402Sru *flagsp |= FORCE_LEFT_BRACKET; 1052114402Sru else if (*start == ']') 1053114402Sru *flagsp |= FORCE_RIGHT_BRACKET; 1054114402Sru else if (!csspace(*start)) 1055114402Sru break; 1056114402Sru } 1057114402Sru if (start >= end) { 1058114402Sru error("empty reference"); 1059114402Sru return new reference; 1060114402Sru } 1061114402Sru reference *database_ref = 0; 1062114402Sru if (start < ptr) 1063114402Sru database_ref = find_reference(start, ptr - start); 1064114402Sru reference *inline_ref = 0; 1065114402Sru if (ptr < end) 1066114402Sru inline_ref = new reference(ptr, end - ptr); 1067114402Sru if (inline_ref) { 1068114402Sru if (database_ref) { 1069114402Sru database_ref->merge(*inline_ref); 1070114402Sru delete inline_ref; 1071114402Sru return database_ref; 1072114402Sru } 1073114402Sru else 1074114402Sru return inline_ref; 1075114402Sru } 1076114402Sru else if (database_ref) 1077114402Sru return database_ref; 1078114402Sru else 1079114402Sru return new reference; 1080114402Sru} 1081114402Sru 1082114402Srustatic void do_ref(const string &str) 1083114402Sru{ 1084114402Sru if (accumulate) 1085114402Sru (void)store_reference(str); 1086114402Sru else { 1087114402Sru (void)immediately_handle_reference(str); 1088114402Sru immediately_output_references(); 1089114402Sru } 1090114402Sru} 1091114402Sru 1092114402Srustatic void trim_blanks(string &str) 1093114402Sru{ 1094114402Sru const char *start = str.contents(); 1095114402Sru const char *end = start + str.length(); 1096114402Sru while (end > start && end[-1] != '\n' && csspace(end[-1])) 1097114402Sru --end; 1098114402Sru str.set_length(end - start); 1099114402Sru} 1100114402Sru 1101114402Sruvoid do_bib(const char *filename) 1102114402Sru{ 1103114402Sru FILE *fp; 1104114402Sru if (strcmp(filename, "-") == 0) 1105114402Sru fp = stdin; 1106114402Sru else { 1107114402Sru errno = 0; 1108114402Sru fp = fopen(filename, "r"); 1109114402Sru if (fp == 0) { 1110114402Sru error("can't open `%1': %2", filename, strerror(errno)); 1111114402Sru return; 1112114402Sru } 1113114402Sru current_filename = filename; 1114114402Sru } 1115114402Sru enum { 1116114402Sru START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT 1117114402Sru } state = START; 1118114402Sru string body; 1119114402Sru for (;;) { 1120114402Sru int c = getc(fp); 1121114402Sru if (c == EOF) 1122114402Sru break; 1123114402Sru if (invalid_input_char(c)) { 1124114402Sru error("invalid input character code %1", c); 1125114402Sru continue; 1126114402Sru } 1127114402Sru switch (state) { 1128114402Sru case START: 1129114402Sru if (c == '%') { 1130114402Sru body = c; 1131114402Sru state = BODY; 1132114402Sru } 1133114402Sru else if (c != '\n') 1134114402Sru state = MIDDLE; 1135114402Sru break; 1136114402Sru case MIDDLE: 1137114402Sru if (c == '\n') 1138114402Sru state = START; 1139114402Sru break; 1140114402Sru case BODY: 1141114402Sru body += c; 1142114402Sru if (c == '\n') 1143114402Sru state = BODY_START; 1144114402Sru break; 1145114402Sru case BODY_START: 1146114402Sru if (c == '\n') { 1147114402Sru do_ref(body); 1148114402Sru state = START; 1149114402Sru } 1150114402Sru else if (c == '.') 1151114402Sru state = BODY_DOT; 1152114402Sru else if (csspace(c)) { 1153114402Sru state = BODY_BLANK; 1154114402Sru body += c; 1155114402Sru } 1156114402Sru else { 1157114402Sru body += c; 1158114402Sru state = BODY; 1159114402Sru } 1160114402Sru break; 1161114402Sru case BODY_BLANK: 1162114402Sru if (c == '\n') { 1163114402Sru trim_blanks(body); 1164114402Sru do_ref(body); 1165114402Sru state = START; 1166114402Sru } 1167114402Sru else if (csspace(c)) 1168114402Sru body += c; 1169114402Sru else { 1170114402Sru body += c; 1171114402Sru state = BODY; 1172114402Sru } 1173114402Sru break; 1174114402Sru case BODY_DOT: 1175114402Sru if (c == ']') { 1176114402Sru do_ref(body); 1177114402Sru state = MIDDLE; 1178114402Sru } 1179114402Sru else { 1180114402Sru body += '.'; 1181114402Sru body += c; 1182114402Sru state = c == '\n' ? BODY_START : BODY; 1183114402Sru } 1184114402Sru break; 1185114402Sru default: 1186114402Sru assert(0); 1187114402Sru } 1188114402Sru if (c == '\n') 1189114402Sru current_lineno++; 1190114402Sru } 1191114402Sru switch (state) { 1192114402Sru case START: 1193114402Sru case MIDDLE: 1194114402Sru break; 1195114402Sru case BODY: 1196114402Sru body += '\n'; 1197114402Sru do_ref(body); 1198114402Sru break; 1199114402Sru case BODY_DOT: 1200114402Sru case BODY_START: 1201114402Sru do_ref(body); 1202114402Sru break; 1203114402Sru case BODY_BLANK: 1204114402Sru trim_blanks(body); 1205114402Sru do_ref(body); 1206114402Sru break; 1207114402Sru } 1208114402Sru fclose(fp); 1209114402Sru} 1210114402Sru 1211114402Sru// from the Dragon Book 1212114402Sru 1213114402Sruunsigned hash_string(const char *s, int len) 1214114402Sru{ 1215114402Sru const char *end = s + len; 1216114402Sru unsigned h = 0, g; 1217114402Sru while (s < end) { 1218114402Sru h <<= 4; 1219114402Sru h += *s++; 1220114402Sru if ((g = h & 0xf0000000) != 0) { 1221114402Sru h ^= g >> 24; 1222114402Sru h ^= g; 1223114402Sru } 1224114402Sru } 1225114402Sru return h; 1226114402Sru} 1227114402Sru 1228114402Sruint next_size(int n) 1229114402Sru{ 1230114402Sru static const int table_sizes[] = { 1231114402Sru 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, 1232114402Sru 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, 1233114402Sru 16000057, 32000011, 64000031, 128000003, 0 1234114402Sru }; 1235114402Sru 1236114402Sru const int *p; 1237114402Sru for (p = table_sizes; *p <= n && *p != 0; p++) 1238114402Sru ; 1239114402Sru assert(*p != 0); 1240114402Sru return *p; 1241114402Sru} 1242114402Sru 1243