1151912Sphk/*	$NetBSD$	*/
2151912Sphk
3209440Smav// -*- C++ -*-
4151912Sphk/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
5151912Sphk   Free Software Foundation, Inc.
6151912Sphk     Written by James Clark (jjc@jclark.com)
7151912Sphk
8151912SphkThis file is part of groff.
9151912Sphk
10151912Sphkgroff is free software; you can redistribute it and/or modify it under
11151912Sphkthe terms of the GNU General Public License as published by the Free
12151912SphkSoftware Foundation; either version 2, or (at your option) any later
13151912Sphkversion.
14151912Sphk
15151912Sphkgroff is distributed in the hope that it will be useful, but WITHOUT ANY
16151912SphkWARRANTY; without even the implied warranty of MERCHANTABILITY or
17151912SphkFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18151912Sphkfor more details.
19151912Sphk
20151912SphkYou should have received a copy of the GNU General Public License along
21151912Sphkwith groff; see the file COPYING.  If not, write to the Free Software
22151912SphkFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23151912Sphk
24151912Sphk#include "refer.h"
25151912Sphk#include "refid.h"
26151912Sphk#include "ref.h"
27151912Sphk#include "token.h"
28151912Sphk#include "search.h"
29151912Sphk#include "command.h"
30151912Sphk
31151912Sphkextern "C" const char *Version_string;
32209402Smav
33209371Smavconst char PRE_LABEL_MARKER = '\013';
34209371Smavconst char POST_LABEL_MARKER = '\014';
35209371Smavconst char LABEL_MARKER = '\015'; // label_type is added on
36209371Smav
37151912Sphk#define FORCE_LEFT_BRACKET 04
38159217Snjl#define FORCE_RIGHT_BRACKET 010
39151912Sphk
40151912Sphkstatic FILE *outfp = stdout;
41209371Smav
42151912Sphkstring capitalize_fields;
43151912Sphkstring reverse_fields;
44209371Smavstring abbreviate_fields;
45209371Smavstring period_before_last_name = ". ";
46209371Smavstring period_before_initial = ".";
47151912Sphkstring period_before_hyphen = "";
48159217Snjlstring period_before_other = ". ";
49193530Sjkimstring sort_fields;
50193530Sjkimint annotation_field = -1;
51193530Sjkimstring annotation_macro;
52151912Sphkstring discard_fields = "XYZ";
53175385Sjhbstring pre_label = "\\*([.";
54151912Sphkstring post_label = "\\*(.]";
55209371Smavstring sep_label = ", ";
56209371Smavint accumulate = 0;
57209371Smavint move_punctuation = 0;
58209371Smavint abbreviate_label_ranges = 0;
59203062Savgstring label_range_indicator;
60203062Savgint label_in_text = 1;
61203062Savgint label_in_reference = 1;
62151912Sphkint date_as_label = 0;
63151912Sphkint sort_adjacent_labels = 0;
64209371Smav// Join exactly two authors with this.
65169574Stakawatastring join_authors_exactly_two = " and ";
66151931Sscottl// When there are more than two authors join the last two with this.
67151935Sscottlstring join_authors_last_two = ", and ";
68151931Sscottl// Otherwise join authors with this.
69151931Sscottlstring join_authors_default = ", ";
70209371Smavstring separate_label_second_parts = ", ";
71151912Sphk// Use this string to represent that there are other authors.
72209371Smavstring et_al = " et al";
73209371Smav// Use et al only if it can replace at least this many authors.
74209371Smavint et_al_min_elide = 2;
75209371Smav// Use et al only if the total number of authors is at least this.
76209440Smavint et_al_min_total = 3;
77159217Snjl
78209371Smav
79209371Smavint compatible_flag = 0;
80151912Sphk
81209371Smavint short_label_flag = 0;
82209440Smav
83209371Smavstatic int recognize_R1_R2 = 1;
84209371Smav
85209371Smavsearch_list database_list;
86209371Smavint search_default = 1;
87209371Smavstatic int default_database_loaded = 0;
88209371Smav
89209371Smavstatic reference **citation = 0;
90209371Smavstatic int ncitations = 0;
91209371Smavstatic int citation_max = 0;
92209371Smav
93209371Smavstatic reference **reference_hash_table = 0;
94209371Smavstatic int hash_table_size;
95209371Smavstatic int nreferences = 0;
96209371Smav
97209371Smavstatic int need_syncing = 0;
98209371Smavstring pending_line;
99209371Smavstring pending_lf_lines;
100209371Smav
101209371Smavstatic void output_pending_line();
102151912Sphkstatic unsigned immediately_handle_reference(const string &);
103151912Sphkstatic void immediately_output_references();
104159217Snjlstatic unsigned store_reference(const string &);
105209371Smavstatic void divert_to_temporary_file();
106151912Sphkstatic reference *make_reference(const string &, unsigned *);
107159217Snjlstatic void usage(FILE *stream);
108159217Snjlstatic void do_file(const char *);
109159217Snjlstatic void split_punct(string &line, string &punct);
110151912Sphkstatic void output_citation_group(reference **v, int n, label_type, FILE *fp);
111151912Sphkstatic void possibly_load_default_database();
112209371Smav
113151912Sphkint main(int argc, char **argv)
114151912Sphk{
115175385Sjhb  program_name = argv[0];
116151912Sphk  static char stderr_buf[BUFSIZ];
117151912Sphk  setbuf(stderr, stderr_buf);
118175361Sjhb  outfp = stdout;
119209371Smav  int finished_options = 0;
120175361Sjhb  int bib_flag = 0;
121175361Sjhb  int done_spec = 0;
122175385Sjhb
123175385Sjhb  for (--argc, ++argv;
124209440Smav       !finished_options && argc > 0 && argv[0][0] == '-'
125209440Smav       && argv[0][1] != '\0';
126209440Smav       argv++, argc--) {
127209440Smav    const char *opt = argv[0] + 1;
128185103Sjkim    while (opt != 0 && *opt != '\0') {
129185103Sjkim      switch (*opt) {
130175361Sjhb      case 'C':
131175361Sjhb	compatible_flag = 1;
132175361Sjhb	opt++;
133209371Smav	break;
134175361Sjhb      case 'B':
135175361Sjhb	bib_flag = 1;
136175385Sjhb	label_in_reference = 0;
137175385Sjhb	label_in_text = 0;
138185103Sjkim	++opt;
139185103Sjkim	if (*opt == '\0') {
140175361Sjhb	  annotation_field = 'X';
141175361Sjhb	  annotation_macro = "AP";
142209371Smav	}
143209371Smav	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
144209371Smav	  annotation_field = opt[0];
145209371Smav	  annotation_macro = opt + 2;
146209371Smav	}
147209371Smav	opt = 0;
148209371Smav	break;
149209371Smav      case 'P':
150209371Smav	move_punctuation = 1;
151209371Smav	opt++;
152209371Smav	break;
153209371Smav      case 'R':
154209371Smav	recognize_R1_R2 = 0;
155209371Smav	opt++;
156209371Smav	break;
157209371Smav      case 'S':
158209371Smav	// Not a very useful spec.
159209371Smav	set_label_spec("(A.n|Q)', '(D.y|D)");
160209371Smav	done_spec = 1;
161210290Smav	pre_label = " (";
162210290Smav	post_label = ")";
163210290Smav	sep_label = "; ";
164210290Smav	opt++;
165210290Smav	break;
166210290Smav      case 'V':
167209371Smav	verify_flag = 1;
168209371Smav	opt++;
169209371Smav	break;
170209371Smav      case 'f':
171209371Smav	{
172209371Smav	  const char *num = 0;
173209371Smav	  if (*++opt == '\0') {
174209371Smav	    if (argc > 1) {
175209371Smav	      num = *++argv;
176209371Smav	      --argc;
177209371Smav	    }
178209371Smav	    else {
179209371Smav	      error("option `f' requires an argument");
180209371Smav	      usage(stderr);
181209371Smav	      exit(1);
182209371Smav	    }
183209371Smav	  }
184209371Smav	  else {
185209371Smav	    num = opt;
186209371Smav	    opt = 0;
187209371Smav	  }
188209371Smav	  const char *ptr;
189209371Smav	  for (ptr = num; *ptr; ptr++)
190209371Smav	    if (!csdigit(*ptr)) {
191209371Smav	      error("bad character `%1' in argument to -f option", *ptr);
192209371Smav	      break;
193209371Smav	    }
194209371Smav	  if (*ptr == '\0') {
195209371Smav	    string spec;
196209371Smav	    spec = '%';
197209371Smav	    spec += num;
198209371Smav	    spec += '\0';
199209371Smav	    set_label_spec(spec.contents());
200209371Smav	    done_spec = 1;
201209371Smav	  }
202209371Smav	  break;
203209371Smav	}
204209371Smav      case 'b':
205209371Smav	label_in_text = 0;
206209371Smav	label_in_reference = 0;
207209371Smav	opt++;
208209371Smav	break;
209209371Smav      case 'e':
210209371Smav	accumulate = 1;
211209371Smav	opt++;
212209371Smav	break;
213209371Smav      case 'c':
214209371Smav	capitalize_fields = ++opt;
215209371Smav	opt = 0;
216209371Smav	break;
217209371Smav      case 'k':
218209371Smav	{
219209371Smav	  char buf[5];
220209990Smav	  if (csalpha(*++opt))
221209990Smav	    buf[0] = *opt++;
222209371Smav	  else {
223209371Smav	    if (*opt != '\0')
224209371Smav	      error("bad field name `%1'", *opt++);
225209371Smav	    buf[0] = 'L';
226209371Smav	  }
227209371Smav	  buf[1] = '~';
228209371Smav	  buf[2] = '%';
229209371Smav	  buf[3] = 'a';
230209371Smav	  buf[4] = '\0';
231209371Smav	  set_label_spec(buf);
232209371Smav	  done_spec = 1;
233209371Smav	}
234209371Smav	break;
235209371Smav      case 'a':
236209371Smav	{
237209371Smav	  const char *ptr;
238209371Smav	  for (ptr = ++opt; *ptr; ptr++)
239209371Smav	    if (!csdigit(*ptr)) {
240209371Smav	      error("argument to `a' option not a number");
241209371Smav	      break;
242209371Smav	    }
243209371Smav	  if (*ptr == '\0') {
244209371Smav	    reverse_fields = 'A';
245209371Smav	    reverse_fields += opt;
246208436Smav	  }
247209371Smav	  opt = 0;
248208436Smav	}
249208436Smav	break;
250208436Smav      case 'i':
251208436Smav	linear_ignore_fields = ++opt;
252208438Smav	opt = 0;
253208436Smav	break;
254208436Smav      case 'l':
255208436Smav	{
256208436Smav	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
257208436Smav	  strcpy(buf, "A.n");
258208436Smav	  if (*++opt != '\0' && *opt != ',') {
259208436Smav	    char *ptr;
260209371Smav	    long n = strtol(opt, &ptr, 10);
261209371Smav	    if (n == 0 && ptr == opt) {
262208436Smav	      error("bad integer `%1' in `l' option", opt);
263208436Smav	      opt = 0;
264208436Smav	      break;
265208436Smav	    }
266169592Snjl	    if (n < 0)
267172489Snjl	      n = 0;
268209371Smav	    opt = ptr;
269169574Stakawata	    sprintf(strchr(buf, '\0'), "+%ld", n);
270169574Stakawata	  }
271169574Stakawata	  strcat(buf, "D.y");
272169574Stakawata	  if (*opt == ',')
273208436Smav	    opt++;
274169574Stakawata	  if (*opt != '\0') {
275172489Snjl	    char *ptr;
276209371Smav	    long n = strtol(opt, &ptr, 10);
277172489Snjl	    if (n == 0 && ptr == opt) {
278208436Smav	      error("bad integer `%1' in `l' option", opt);
279208436Smav	      opt = 0;
280208436Smav	      break;
281208436Smav	    }
282208436Smav	    if (n < 0)
283208436Smav	      n = 0;
284208436Smav	    sprintf(strchr(buf, '\0'), "-%ld", n);
285208436Smav	    opt = ptr;
286209371Smav	    if (*opt != '\0')
287208436Smav	      error("argument to `l' option not of form `m,n'");
288208436Smav	  }
289208436Smav	  strcat(buf, "%a");
290208436Smav	  if (!set_label_spec(buf))
291209371Smav	    assert(0);
292208436Smav	  done_spec = 1;
293208436Smav	}
294208436Smav	break;
295208436Smav      case 'n':
296208436Smav	search_default = 0;
297208436Smav	opt++;
298169574Stakawata	break;
299169574Stakawata      case 'p':
300169574Stakawata	{
301151912Sphk	  const char *filename = 0;
302209371Smav	  if (*++opt == '\0') {
303151912Sphk	    if (argc > 1) {
304159217Snjl	      filename = *++argv;
305159217Snjl	      argc--;
306169592Snjl	    }
307151912Sphk	    else {
308199016Savg	      error("option `p' requires an argument");
309208436Smav	      usage(stderr);
310169592Snjl	      exit(1);
311151912Sphk	    }
312159217Snjl	  }
313151912Sphk	  else {
314151912Sphk	    filename = opt;
315151912Sphk	    opt = 0;
316151912Sphk	  }
317209371Smav	  database_list.add_file(filename);
318151912Sphk	}
319209371Smav	break;
320209371Smav      case 's':
321209371Smav	if (*++opt == '\0')
322209371Smav	  sort_fields = "AD";
323209371Smav	else {
324209371Smav	  sort_fields = opt;
325209371Smav	  opt = 0;
326151912Sphk	}
327151912Sphk	accumulate = 1;
328151912Sphk	break;
329151912Sphk      case 't':
330151912Sphk	{
331151912Sphk	  char *ptr;
332151912Sphk	  long n = strtol(opt, &ptr, 10);
333209371Smav	  if (n == 0 && ptr == opt) {
334209371Smav	    error("bad integer `%1' in `t' option", opt);
335159217Snjl	    opt = 0;
336159217Snjl	    break;
337159217Snjl	  }
338151912Sphk	  if (n < 1)
339159217Snjl	    n = 1;
340159217Snjl	  linear_truncate_len = int(n);
341159217Snjl	  opt = ptr;
342159217Snjl	  break;
343159217Snjl	}
344159217Snjl      case '-':
345159217Snjl	if (opt[1] == '\0') {
346151912Sphk	  finished_options = 1;
347171547Snjl	  opt++;
348175361Sjhb	  break;
349171547Snjl	}
350159217Snjl	if (strcmp(opt,"-version")==0) {
351175385Sjhb      case 'v':
352175361Sjhb	  printf("GNU refer (groff) version %s\n", Version_string);
353175361Sjhb	  exit(0);
354175361Sjhb	  break;
355175361Sjhb	}
356175361Sjhb	if (strcmp(opt,"-help")==0) {
357175361Sjhb	  usage(stdout);
358175361Sjhb	  exit(0);
359209371Smav	  break;
360209440Smav	}
361209440Smav	// fall through
362209440Smav      default:
363209440Smav	error("unrecognized option `%1'", *opt);
364209371Smav	usage(stderr);
365209371Smav	exit(1);
366209371Smav	break;
367209371Smav      }
368209371Smav    }
369209371Smav  }
370209371Smav  if (!done_spec)
371209371Smav    set_label_spec("%1");
372209371Smav  if (argc <= 0) {
373159217Snjl    if (bib_flag)
374159217Snjl      do_bib("-");
375209371Smav    else
376209440Smav      do_file("-");
377209440Smav  }
378209440Smav  else {
379209440Smav    for (int i = 0; i < argc; i++) {
380159217Snjl      if (bib_flag)
381209371Smav	do_bib(argv[i]);
382209371Smav      else
383209371Smav	do_file(argv[i]);
384209371Smav    }
385209371Smav  }
386209371Smav  if (accumulate)
387209371Smav    output_references();
388209371Smav  if (fflush(stdout) < 0)
389209371Smav    fatal("output error");
390209371Smav  return 0;
391209371Smav}
392209371Smav
393209371Smavstatic void usage(FILE *stream)
394209371Smav{
395209371Smav  fprintf(stream,
396209371Smav"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
397209371Smav"       [-sXYZ] [-tN] [-BL.M] [files ...]\n",
398209371Smav	  program_name);
399209371Smav}
400159217Snjl
401209371Smavstatic void possibly_load_default_database()
402171547Snjl{
403171547Snjl  if (search_default && !default_database_loaded) {
404171547Snjl    char *filename = getenv("REFER");
405171547Snjl    if (filename)
406175385Sjhb      database_list.add_file(filename);
407171547Snjl    else
408175385Sjhb      database_list.add_file(DEFAULT_INDEX, 1);
409171547Snjl    default_database_loaded = 1;
410171547Snjl  }
411175361Sjhb}
412171547Snjl
413171547Snjlstatic int is_list(const string &str)
414171547Snjl{
415208436Smav  const char *start = str.contents();
416208436Smav  const char *end = start + str.length();
417209371Smav  while (end > start && csspace(end[-1]))
418209371Smav    end--;
419209371Smav  while (start < end && csspace(*start))
420209371Smav    start++;
421209371Smav  return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
422209371Smav}
423209371Smav
424208436Smavstatic void do_file(const char *filename)
425209371Smav{
426209371Smav  FILE *fp;
427209371Smav  if (strcmp(filename, "-") == 0) {
428209371Smav    fp = stdin;
429209440Smav  }
430209440Smav  else {
431209440Smav    errno = 0;
432209440Smav    fp = fopen(filename, "r");
433209440Smav    if (fp == 0) {
434209440Smav      error("can't open `%1': %2", filename, strerror(errno));
435209440Smav      return;
436209440Smav    }
437209440Smav  }
438209440Smav  current_filename = filename;
439209440Smav  fprintf(outfp, ".lf 1 %s\n", filename);
440209440Smav  string line;
441209440Smav  current_lineno = 0;
442209440Smav  for (;;) {
443209371Smav    line.clear();
444209371Smav    for (;;) {
445209371Smav      int c = getc(fp);
446209371Smav      if (c == EOF) {
447209371Smav	if (line.length() > 0)
448209371Smav	  line += '\n';
449209371Smav	break;
450209371Smav      }
451209371Smav      if (invalid_input_char(c))
452209371Smav	error("invalid input character code %1", c);
453209371Smav      else {
454209371Smav	line += c;
455209371Smav	if (c == '\n')
456209440Smav	  break;
457209440Smav      }
458209371Smav    }
459209440Smav    int len = line.length();
460209371Smav    if (len == 0)
461209371Smav      break;
462209371Smav    current_lineno++;
463209371Smav    if (len >= 2 && line[0] == '.' && line[1] == '[') {
464209440Smav      int start_lineno = current_lineno;
465209440Smav      int start_of_line = 1;
466209440Smav      string str;
467209440Smav      string post;
468209440Smav      string pre(line.contents() + 2, line.length() - 3);
469209440Smav      for (;;) {
470209371Smav	int c = getc(fp);
471209440Smav	if (c == EOF) {
472209440Smav	  error_with_file_and_line(current_filename, start_lineno,
473209440Smav				   "missing `.]' line");
474209440Smav	  break;
475209371Smav	}
476209371Smav	if (start_of_line)
477209371Smav	  current_lineno++;
478209371Smav	if (start_of_line && c == '.') {
479209440Smav	  int d = getc(fp);
480209440Smav	  if (d == ']') {
481209440Smav	    while ((d = getc(fp)) != '\n' && d != EOF) {
482209371Smav	      if (invalid_input_char(d))
483209371Smav		error("invalid input character code %1", d);
484209371Smav	      else
485209371Smav		post += d;
486209371Smav	    }
487209440Smav	    break;
488209440Smav	  }
489209371Smav	  if (d != EOF)
490209371Smav	    ungetc(d, fp);
491209371Smav	}
492209371Smav	if (invalid_input_char(c))
493209440Smav	  error("invalid input character code %1", c);
494209440Smav	else
495209440Smav	  str += c;
496209440Smav	start_of_line = (c == '\n');
497209440Smav      }
498209440Smav      if (is_list(str)) {
499209440Smav	output_pending_line();
500209440Smav	if (accumulate)
501209440Smav	  output_references();
502209440Smav	else
503209440Smav	  error("found `$LIST$' but not accumulating references");
504209440Smav      }
505209440Smav      else {
506209440Smav	unsigned flags = (accumulate
507209440Smav			  ? store_reference(str)
508209440Smav			  : immediately_handle_reference(str));
509209440Smav	if (label_in_text) {
510209440Smav	  if (accumulate && outfp == stdout)
511209440Smav	    divert_to_temporary_file();
512209440Smav	  if (pending_line.length() == 0) {
513209440Smav	    warning("can't attach citation to previous line");
514209440Smav	  }
515209371Smav	  else
516209371Smav	    pending_line.set_length(pending_line.length() - 1);
517209371Smav	  string punct;
518209371Smav	  if (move_punctuation)
519209371Smav	    split_punct(pending_line, punct);
520209371Smav	  int have_text = pre.length() > 0 || post.length() > 0;
521209371Smav	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
522209371Smav					       |FORCE_RIGHT_BRACKET));
523209371Smav	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
524209371Smav	    pending_line += PRE_LABEL_MARKER;
525209371Smav	  pending_line += pre;
526209371Smav	  char lm = LABEL_MARKER + (int)lt;
527209371Smav	  pending_line += lm;
528209371Smav	  pending_line += post;
529209371Smav	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
530209371Smav	    pending_line += POST_LABEL_MARKER;
531209371Smav	  pending_line += punct;
532209371Smav	  pending_line += '\n';
533209371Smav	}
534209371Smav      }
535209371Smav      need_syncing = 1;
536209371Smav    }
537209371Smav    else if (len >= 4
538209371Smav	     && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
539209371Smav	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
540209371Smav      pending_lf_lines += line;
541209371Smav      line += '\0';
542209440Smav      if (interpret_lf_args(line.contents() + 3))
543209371Smav	current_lineno--;
544209440Smav    }
545209440Smav    else if (recognize_R1_R2
546209440Smav	     && len >= 4
547209371Smav	     && line[0] == '.' && line[1] == 'R' && line[2] == '1'
548209371Smav	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
549209371Smav      line.clear();
550209371Smav      int start_of_line = 1;
551209371Smav      int start_lineno = current_lineno;
552209371Smav      for (;;) {
553209371Smav	int c = getc(fp);
554209371Smav	if (c != EOF && start_of_line)
555209371Smav	  current_lineno++;
556209371Smav	if (start_of_line && c == '.') {
557209371Smav	  c = getc(fp);
558209371Smav	  if (c == 'R') {
559209371Smav	    c = getc(fp);
560209371Smav	    if (c == '2') {
561209371Smav	      c = getc(fp);
562209371Smav	      if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
563209371Smav		while (c != EOF && c != '\n')
564209431Smav		  c = getc(fp);
565209371Smav		break;
566209371Smav	      }
567209371Smav	      else {
568209371Smav		line += '.';
569209371Smav		line += 'R';
570209371Smav		line += '2';
571209371Smav	      }
572209371Smav	    }
573209371Smav	    else {
574209371Smav	      line += '.';
575209371Smav	      line += 'R';
576209371Smav	    }
577209371Smav	  }
578209371Smav	  else
579209371Smav	    line += '.';
580209371Smav	}
581209371Smav	if (c == EOF) {
582209371Smav	  error_with_file_and_line(current_filename, start_lineno,
583209371Smav				   "missing `.R2' line");
584209371Smav	  break;
585209371Smav	}
586209371Smav	if (invalid_input_char(c))
587210290Smav	  error("invalid input character code %1", int(c));
588210298Smav	else {
589210298Smav	  line += c;
590210290Smav	  start_of_line = c == '\n';
591210298Smav	}
592209371Smav      }
593209371Smav      output_pending_line();
594209371Smav      if (accumulate)
595209371Smav	output_references();
596209371Smav      else
597209371Smav	nreferences = 0;
598209371Smav      process_commands(line, current_filename, start_lineno + 1);
599209371Smav      need_syncing = 1;
600159217Snjl    }
601159217Snjl    else {
602159217Snjl      output_pending_line();
603159217Snjl      pending_line = line;
604209371Smav    }
605159217Snjl  }
606159217Snjl  need_syncing = 0;
607159217Snjl  output_pending_line();
608159217Snjl  if (fp != stdin)
609159217Snjl    fclose(fp);
610159217Snjl}
611159217Snjl
612168010Snjlclass label_processing_state {
613209371Smav  enum {
614175361Sjhb    NORMAL,
615209371Smav    PENDING_LABEL,
616175361Sjhb    PENDING_LABEL_POST,
617175361Sjhb    PENDING_LABEL_POST_PRE,
618175361Sjhb    PENDING_POST
619175361Sjhb    } state;
620175361Sjhb  label_type type;		// type of pending labels
621175361Sjhb  int count;			// number of pending labels
622175361Sjhb  reference **rptr;		// pointer to next reference
623175361Sjhb  int rcount;			// number of references left
624175361Sjhb  FILE *fp;
625175361Sjhb  int handle_pending(int c);
626175361Sjhbpublic:
627175361Sjhb  label_processing_state(reference **, int, FILE *);
628175361Sjhb  ~label_processing_state();
629209371Smav  void process(int c);
630168010Snjl};
631209371Smav
632209371Smavstatic void output_pending_line()
633209371Smav{
634168010Snjl  if (label_in_text && !accumulate && ncitations > 0) {
635168010Snjl    label_processing_state state(citation, ncitations, outfp);
636168010Snjl    int len = pending_line.length();
637175361Sjhb    for (int i = 0; i < len; i++)
638209371Smav      state.process((unsigned char)(pending_line[i]));
639209371Smav  }
640209371Smav  else
641209371Smav    put_string(pending_line, outfp);
642209440Smav  pending_line.clear();
643209371Smav  if (pending_lf_lines.length() > 0) {
644209371Smav    put_string(pending_lf_lines, outfp);
645209371Smav    pending_lf_lines.clear();
646209371Smav  }
647209371Smav  if (!accumulate)
648209371Smav    immediately_output_references();
649209371Smav  if (need_syncing) {
650209371Smav    fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
651209371Smav    need_syncing = 0;
652209371Smav  }
653209371Smav}
654209371Smav
655209371Smavstatic void split_punct(string &line, string &punct)
656209371Smav{
657209371Smav  const char *start = line.contents();
658209371Smav  const char *end = start + line.length();
659209371Smav  const char *ptr = start;
660209371Smav  const char *last_token_start = 0;
661209371Smav  for (;;) {
662209371Smav    if (ptr >= end)
663209371Smav      break;
664209371Smav    last_token_start = ptr;
665209371Smav    if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
666209371Smav	|| (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
667209371Smav      ptr++;
668209371Smav    else if (!get_token(&ptr, end))
669209371Smav      break;
670209371Smav  }
671209371Smav  if (last_token_start) {
672209371Smav    const token_info *ti = lookup_token(last_token_start, end);
673209371Smav    if (ti->is_punct()) {
674209371Smav      punct.append(last_token_start, end - last_token_start);
675168010Snjl      line.set_length(last_token_start - start);
676168010Snjl    }
677168010Snjl  }
678159217Snjl}
679159217Snjl
680209371Smavstatic void divert_to_temporary_file()
681159217Snjl{
682151912Sphk  outfp = xtmpfile();
683151912Sphk}
684151912Sphk
685151912Sphkstatic void store_citation(reference *ref)
686151912Sphk{
687151912Sphk  if (ncitations >= citation_max) {
688151912Sphk    if (citation == 0)
689151912Sphk      citation = new reference*[citation_max = 100];
690175385Sjhb    else {
691151912Sphk      reference **old_citation = citation;
692175385Sjhb      citation_max *= 2;
693151912Sphk      citation = new reference *[citation_max];
694175385Sjhb      memcpy(citation, old_citation, ncitations*sizeof(reference *));
695151912Sphk      a_delete old_citation;
696151912Sphk    }
697151912Sphk  }
698151912Sphk  citation[ncitations++] = ref;
699151912Sphk}
700151912Sphk
701159217Snjlstatic unsigned store_reference(const string &str)
702151912Sphk{
703151912Sphk  if (reference_hash_table == 0) {
704159217Snjl    reference_hash_table = new reference *[17];
705151912Sphk    hash_table_size = 17;
706151912Sphk    for (int i = 0; i < hash_table_size; i++)
707209371Smav      reference_hash_table[i] = 0;
708209371Smav  }
709209371Smav  unsigned flags;
710209371Smav  reference *ref = make_reference(str, &flags);
711209371Smav  ref->compute_hash_code();
712209371Smav  unsigned h = ref->hash();
713209371Smav  reference **ptr;
714209371Smav  for (ptr = reference_hash_table + (h % hash_table_size);
715209371Smav       *ptr != 0;
716209371Smav       ((ptr == reference_hash_table)
717209371Smav	? (ptr = reference_hash_table + hash_table_size - 1)
718209371Smav	: --ptr))
719209371Smav    if (same_reference(**ptr, *ref))
720209371Smav      break;
721209371Smav  if (*ptr != 0) {
722209371Smav    if (ref->is_merged())
723209371Smav      warning("fields ignored because reference already used");
724209371Smav    delete ref;
725209371Smav    ref = *ptr;
726209371Smav  }
727209371Smav  else {
728209371Smav    *ptr = ref;
729209371Smav    ref->set_number(nreferences);
730209371Smav    nreferences++;
731209371Smav    ref->pre_compute_label();
732209371Smav    ref->compute_sort_key();
733209371Smav    if (nreferences*2 >= hash_table_size) {
734209371Smav      // Rehash it.
735209371Smav      reference **old_table = reference_hash_table;
736209371Smav      int old_size = hash_table_size;
737151912Sphk      hash_table_size = next_size(hash_table_size);
738209371Smav      reference_hash_table = new reference*[hash_table_size];
739209371Smav      int i;
740209371Smav      for (i = 0; i < hash_table_size; i++)
741209371Smav	reference_hash_table[i] = 0;
742209371Smav      for (i = 0; i < old_size; i++)
743209371Smav	if (old_table[i]) {
744151912Sphk	  reference **p;
745209371Smav	  for (p = (reference_hash_table
746209371Smav				+ (old_table[i]->hash() % hash_table_size));
747209371Smav	       *p;
748209371Smav	       ((p == reference_hash_table)
749151912Sphk		? (p = reference_hash_table + hash_table_size - 1)
750151912Sphk		: --p))
751151912Sphk	    ;
752209371Smav	  *p = old_table[i];
753209371Smav	}
754209371Smav      a_delete old_table;
755209371Smav    }
756151912Sphk  }
757151912Sphk  if (label_in_text)
758209371Smav    store_citation(ref);
759209371Smav  return flags;
760}
761
762unsigned immediately_handle_reference(const string &str)
763{
764  unsigned flags;
765  reference *ref = make_reference(str, &flags);
766  ref->set_number(nreferences);
767  if (label_in_text || label_in_reference) {
768    ref->pre_compute_label();
769    ref->immediate_compute_label();
770  }
771  nreferences++;
772  store_citation(ref);
773  return flags;
774}
775
776static void immediately_output_references()
777{
778  for (int i = 0; i < ncitations; i++) {
779    reference *ref = citation[i];
780    if (label_in_reference) {
781      fputs(".ds [F ", outfp);
782      const string &label = ref->get_label(NORMAL_LABEL);
783      if (label.length() > 0
784	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
785	putc('"', outfp);
786      put_string(label, outfp);
787      putc('\n', outfp);
788    }
789    ref->output(outfp);
790    delete ref;
791  }
792  ncitations = 0;
793}
794
795static void output_citation_group(reference **v, int n, label_type type,
796				  FILE *fp)
797{
798  if (sort_adjacent_labels) {
799    // Do an insertion sort.  Usually n will be very small.
800    for (int i = 1; i < n; i++) {
801      int num = v[i]->get_number();
802      reference *temp = v[i];
803      int j;
804      for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
805	v[j + 1] = v[j];
806      v[j + 1] = temp;
807    }
808  }
809  // This messes up if !accumulate.
810  if (accumulate && n > 1) {
811    // remove duplicates
812    int j = 1;
813    for (int i = 1; i < n; i++)
814      if (v[i]->get_label(type) != v[i - 1]->get_label(type))
815	v[j++] = v[i];
816    n = j;
817  }
818  string merged_label;
819  for (int i = 0; i < n; i++) {
820    int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
821    if (nmerged > 0) {
822      put_string(merged_label, fp);
823      i += nmerged;
824    }
825    else
826      put_string(v[i]->get_label(type), fp);
827    if (i < n - 1)
828      put_string(sep_label, fp);
829  }
830}
831
832
833label_processing_state::label_processing_state(reference **p, int n, FILE *f)
834: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
835{
836}
837
838label_processing_state::~label_processing_state()
839{
840  int handled = handle_pending(EOF);
841  assert(!handled);
842  assert(rcount == 0);
843}
844
845int label_processing_state::handle_pending(int c)
846{
847  switch (state) {
848  case NORMAL:
849    break;
850  case PENDING_LABEL:
851    if (c == POST_LABEL_MARKER) {
852      state = PENDING_LABEL_POST;
853      return 1;
854    }
855    else {
856      output_citation_group(rptr, count, type, fp);
857      rptr += count ;
858      rcount -= count;
859      state = NORMAL;
860    }
861    break;
862  case PENDING_LABEL_POST:
863    if (c == PRE_LABEL_MARKER) {
864      state = PENDING_LABEL_POST_PRE;
865      return 1;
866    }
867    else {
868      output_citation_group(rptr, count, type, fp);
869      rptr += count;
870      rcount -= count;
871      put_string(post_label, fp);
872      state = NORMAL;
873    }
874    break;
875  case PENDING_LABEL_POST_PRE:
876    if (c >= LABEL_MARKER
877	&& c < LABEL_MARKER + N_LABEL_TYPES
878	&& c - LABEL_MARKER == type) {
879      count += 1;
880      state = PENDING_LABEL;
881      return 1;
882    }
883    else {
884      output_citation_group(rptr, count, type, fp);
885      rptr += count;
886      rcount -= count;
887      put_string(sep_label, fp);
888      state = NORMAL;
889    }
890    break;
891  case PENDING_POST:
892    if (c == PRE_LABEL_MARKER) {
893      put_string(sep_label, fp);
894      state = NORMAL;
895      return 1;
896    }
897    else {
898      put_string(post_label, fp);
899      state = NORMAL;
900    }
901    break;
902  }
903  return 0;
904}
905
906void label_processing_state::process(int c)
907{
908  if (handle_pending(c))
909    return;
910  assert(state == NORMAL);
911  switch (c) {
912  case PRE_LABEL_MARKER:
913    put_string(pre_label, fp);
914    state = NORMAL;
915    break;
916  case POST_LABEL_MARKER:
917    state = PENDING_POST;
918    break;
919  case LABEL_MARKER:
920  case LABEL_MARKER + 1:
921    count = 1;
922    state = PENDING_LABEL;
923    type = label_type(c - LABEL_MARKER);
924    break;
925  default:
926    state = NORMAL;
927    putc(c, fp);
928    break;
929  }
930}
931
932extern "C" {
933
934int rcompare(const void *p1, const void *p2)
935{
936  return compare_reference(**(reference **)p1, **(reference **)p2);
937}
938
939}
940
941void output_references()
942{
943  assert(accumulate);
944  if (!hash_table_size) {
945    error("nothing to reference (probably `bibliography' before `sort')");
946    accumulate = 0;
947    nreferences = 0;
948    return;
949  }
950  if (nreferences > 0) {
951    int j = 0;
952    int i;
953    for (i = 0; i < hash_table_size; i++)
954      if (reference_hash_table[i] != 0)
955	reference_hash_table[j++] = reference_hash_table[i];
956    assert(j == nreferences);
957    for (; j < hash_table_size; j++)
958      reference_hash_table[j] = 0;
959    qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
960    for (i = 0; i < nreferences; i++)
961      reference_hash_table[i]->set_number(i);
962    compute_labels(reference_hash_table, nreferences);
963  }
964  if (outfp != stdout) {
965    rewind(outfp);
966    {
967      label_processing_state state(citation, ncitations, stdout);
968      int c;
969      while ((c = getc(outfp)) != EOF)
970	state.process(c);
971    }
972    ncitations = 0;
973    fclose(outfp);
974    outfp = stdout;
975  }
976  if (nreferences > 0) {
977    fputs(".]<\n", outfp);
978    for (int i = 0; i < nreferences; i++) {
979      if (sort_fields.length() > 0)
980	reference_hash_table[i]->print_sort_key_comment(outfp);
981      if (label_in_reference) {
982	fputs(".ds [F ", outfp);
983	const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
984	if (label.length() > 0
985	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
986	  putc('"', outfp);
987	put_string(label, outfp);
988	putc('\n', outfp);
989      }
990      reference_hash_table[i]->output(outfp);
991      delete reference_hash_table[i];
992      reference_hash_table[i] = 0;
993    }
994    fputs(".]>\n", outfp);
995    nreferences = 0;
996  }
997  clear_labels();
998}
999
1000static reference *find_reference(const char *query, int query_len)
1001{
1002  // This is so that error messages look better.
1003  while (query_len > 0 && csspace(query[query_len - 1]))
1004    query_len--;
1005  string str;
1006  for (int i = 0; i < query_len; i++)
1007    str += query[i] == '\n' ? ' ' : query[i];
1008  str += '\0';
1009  possibly_load_default_database();
1010  search_list_iterator iter(&database_list, str.contents());
1011  reference_id rid;
1012  const char *start;
1013  int len;
1014  if (!iter.next(&start, &len, &rid)) {
1015    error("no matches for `%1'", str.contents());
1016    return 0;
1017  }
1018  const char *end = start + len;
1019  while (start < end) {
1020    if (*start == '%')
1021      break;
1022    while (start < end && *start++ != '\n')
1023      ;
1024  }
1025  if (start >= end) {
1026    error("found a reference for `%1' but it didn't contain any fields",
1027	  str.contents());
1028    return 0;
1029  }
1030  reference *result = new reference(start, end - start, &rid);
1031  if (iter.next(&start, &len, &rid))
1032    warning("multiple matches for `%1'", str.contents());
1033  return result;
1034}
1035
1036static reference *make_reference(const string &str, unsigned *flagsp)
1037{
1038  const char *start = str.contents();
1039  const char *end = start + str.length();
1040  const char *ptr = start;
1041  while (ptr < end) {
1042    if (*ptr == '%')
1043      break;
1044    while (ptr < end && *ptr++ != '\n')
1045      ;
1046  }
1047  *flagsp = 0;
1048  for (; start < ptr; start++) {
1049    if (*start == '#')
1050      *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1051					   | FORCE_LEFT_BRACKET)));
1052    else if (*start == '[')
1053      *flagsp |= FORCE_LEFT_BRACKET;
1054    else if (*start == ']')
1055      *flagsp |= FORCE_RIGHT_BRACKET;
1056    else if (!csspace(*start))
1057      break;
1058  }
1059  if (start >= end) {
1060    error("empty reference");
1061    return new reference;
1062  }
1063  reference *database_ref = 0;
1064  if (start < ptr)
1065    database_ref = find_reference(start, ptr - start);
1066  reference *inline_ref = 0;
1067  if (ptr < end)
1068    inline_ref = new reference(ptr, end - ptr);
1069  if (inline_ref) {
1070    if (database_ref) {
1071      database_ref->merge(*inline_ref);
1072      delete inline_ref;
1073      return database_ref;
1074    }
1075    else
1076      return inline_ref;
1077  }
1078  else if (database_ref)
1079    return database_ref;
1080  else
1081    return new reference;
1082}
1083
1084static void do_ref(const string &str)
1085{
1086  if (accumulate)
1087    (void)store_reference(str);
1088  else {
1089    (void)immediately_handle_reference(str);
1090    immediately_output_references();
1091  }
1092}
1093
1094static void trim_blanks(string &str)
1095{
1096  const char *start = str.contents();
1097  const char *end = start + str.length();
1098  while (end > start && end[-1] != '\n' && csspace(end[-1]))
1099    --end;
1100  str.set_length(end - start);
1101}
1102
1103void do_bib(const char *filename)
1104{
1105  FILE *fp;
1106  if (strcmp(filename, "-") == 0)
1107    fp = stdin;
1108  else {
1109    errno = 0;
1110    fp = fopen(filename, "r");
1111    if (fp == 0) {
1112      error("can't open `%1': %2", filename, strerror(errno));
1113      return;
1114    }
1115    current_filename = filename;
1116  }
1117  enum {
1118    START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1119    } state = START;
1120  string body;
1121  for (;;) {
1122    int c = getc(fp);
1123    if (c == EOF)
1124      break;
1125    if (invalid_input_char(c)) {
1126      error("invalid input character code %1", c);
1127      continue;
1128    }
1129    switch (state) {
1130    case START:
1131      if (c == '%') {
1132	body = c;
1133	state = BODY;
1134      }
1135      else if (c != '\n')
1136	state = MIDDLE;
1137      break;
1138    case MIDDLE:
1139      if (c == '\n')
1140	state = START;
1141      break;
1142    case BODY:
1143      body += c;
1144      if (c == '\n')
1145	state = BODY_START;
1146      break;
1147    case BODY_START:
1148      if (c == '\n') {
1149	do_ref(body);
1150	state = START;
1151      }
1152      else if (c == '.')
1153	state = BODY_DOT;
1154      else if (csspace(c)) {
1155	state = BODY_BLANK;
1156	body += c;
1157      }
1158      else {
1159	body += c;
1160	state = BODY;
1161      }
1162      break;
1163    case BODY_BLANK:
1164      if (c == '\n') {
1165	trim_blanks(body);
1166	do_ref(body);
1167	state = START;
1168      }
1169      else if (csspace(c))
1170	body += c;
1171      else {
1172	body += c;
1173	state = BODY;
1174      }
1175      break;
1176    case BODY_DOT:
1177      if (c == ']') {
1178	do_ref(body);
1179	state = MIDDLE;
1180      }
1181      else {
1182	body += '.';
1183	body += c;
1184	state = c == '\n' ? BODY_START : BODY;
1185      }
1186      break;
1187    default:
1188      assert(0);
1189    }
1190    if (c == '\n')
1191      current_lineno++;
1192  }
1193  switch (state) {
1194  case START:
1195  case MIDDLE:
1196    break;
1197  case BODY:
1198    body += '\n';
1199    do_ref(body);
1200    break;
1201  case BODY_DOT:
1202  case BODY_START:
1203    do_ref(body);
1204    break;
1205  case BODY_BLANK:
1206    trim_blanks(body);
1207    do_ref(body);
1208    break;
1209  }
1210  fclose(fp);
1211}
1212
1213// from the Dragon Book
1214
1215unsigned hash_string(const char *s, int len)
1216{
1217  const char *end = s + len;
1218  unsigned h = 0, g;
1219  while (s < end) {
1220    h <<= 4;
1221    h += *s++;
1222    if ((g = h & 0xf0000000) != 0) {
1223      h ^= g >> 24;
1224      h ^= g;
1225    }
1226  }
1227  return h;
1228}
1229
1230int next_size(int n)
1231{
1232  static const int table_sizes[] = {
1233    101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1234    80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1235    16000057, 32000011, 64000031, 128000003, 0
1236  };
1237
1238  const int *p;
1239  for (p = table_sizes; *p <= n && *p != 0; p++)
1240    ;
1241  assert(*p != 0);
1242  return *p;
1243}
1244
1245