refer.cpp revision 114402
1// -*- C++ -*-
2/* Copyright (C) 1989-1992, 2000, 2001, 2002 Free Software Foundation, Inc.
3     Written by James Clark (jjc@jclark.com)
4
5This file is part of groff.
6
7groff is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12groff is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License along
18with groff; see the file COPYING.  If not, write to the Free Software
19Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21#include "refer.h"
22#include "refid.h"
23#include "ref.h"
24#include "token.h"
25#include "search.h"
26#include "command.h"
27
28extern "C" const char *Version_string;
29
30const char PRE_LABEL_MARKER = '\013';
31const char POST_LABEL_MARKER = '\014';
32const char LABEL_MARKER = '\015'; // label_type is added on
33
34#define FORCE_LEFT_BRACKET 04
35#define FORCE_RIGHT_BRACKET 010
36
37static FILE *outfp = stdout;
38
39string capitalize_fields;
40string reverse_fields;
41string abbreviate_fields;
42string period_before_last_name = ". ";
43string period_before_initial = ".";
44string period_before_hyphen = "";
45string period_before_other = ". ";
46string sort_fields;
47int annotation_field = -1;
48string annotation_macro;
49string discard_fields = "XYZ";
50string pre_label = "\\*([.";
51string post_label = "\\*(.]";
52string sep_label = ", ";
53int accumulate = 0;
54int move_punctuation = 0;
55int abbreviate_label_ranges = 0;
56string label_range_indicator;
57int label_in_text = 1;
58int label_in_reference = 1;
59int date_as_label = 0;
60int sort_adjacent_labels = 0;
61// Join exactly two authors with this.
62string join_authors_exactly_two = " and ";
63// When there are more than two authors join the last two with this.
64string join_authors_last_two = ", and ";
65// Otherwise join authors with this.
66string join_authors_default = ", ";
67string separate_label_second_parts = ", ";
68// Use this string to represent that there are other authors.
69string et_al = " et al";
70// Use et al only if it can replace at least this many authors.
71int et_al_min_elide = 2;
72// Use et al only if the total number of authors is at least this.
73int et_al_min_total = 3;
74
75
76int compatible_flag = 0;
77
78int short_label_flag = 0;
79
80static int recognize_R1_R2 = 1;
81
82search_list database_list;
83int search_default = 1;
84static int default_database_loaded = 0;
85
86static reference **citation = 0;
87static int ncitations = 0;
88static int citation_max = 0;
89
90static reference **reference_hash_table = 0;
91static int hash_table_size;
92static int nreferences = 0;
93
94static int need_syncing = 0;
95string pending_line;
96string pending_lf_lines;
97
98static void output_pending_line();
99static unsigned immediately_handle_reference(const string &);
100static void immediately_output_references();
101static unsigned store_reference(const string &);
102static void divert_to_temporary_file();
103static reference *make_reference(const string &, unsigned *);
104static void usage(FILE *stream);
105static void do_file(const char *);
106static void split_punct(string &line, string &punct);
107static void output_citation_group(reference **v, int n, label_type, FILE *fp);
108static void possibly_load_default_database();
109
110int main(int argc, char **argv)
111{
112  program_name = argv[0];
113  static char stderr_buf[BUFSIZ];
114  setbuf(stderr, stderr_buf);
115  outfp = stdout;
116  int finished_options = 0;
117  int bib_flag = 0;
118  int done_spec = 0;
119
120  for (--argc, ++argv;
121       !finished_options && argc > 0 && argv[0][0] == '-'
122       && argv[0][1] != '\0';
123       argv++, argc--) {
124    const char *opt = argv[0] + 1;
125    while (opt != 0 && *opt != '\0') {
126      switch (*opt) {
127      case 'C':
128	compatible_flag = 1;
129	opt++;
130	break;
131      case 'B':
132	bib_flag = 1;
133	label_in_reference = 0;
134	label_in_text = 0;
135	++opt;
136	if (*opt == '\0') {
137	  annotation_field = 'X';
138	  annotation_macro = "AP";
139	}
140	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
141	  annotation_field = opt[0];
142	  annotation_macro = opt + 2;
143	}
144	opt = 0;
145	break;
146      case 'P':
147	move_punctuation = 1;
148	opt++;
149	break;
150      case 'R':
151	recognize_R1_R2 = 0;
152	opt++;
153	break;
154      case 'S':
155	// Not a very useful spec.
156	set_label_spec("(A.n|Q)', '(D.y|D)");
157	done_spec = 1;
158	pre_label = " (";
159	post_label = ")";
160	sep_label = "; ";
161	opt++;
162	break;
163      case 'V':
164	verify_flag = 1;
165	opt++;
166	break;
167      case 'f':
168	{
169	  const char *num = 0;
170	  if (*++opt == '\0') {
171	    if (argc > 1) {
172	      num = *++argv;
173	      --argc;
174	    }
175	    else {
176	      error("option `f' requires an argument");
177	      usage(stderr);
178	      exit(1);
179	    }
180	  }
181	  else {
182	    num = opt;
183	    opt = 0;
184	  }
185	  const char *ptr;
186	  for (ptr = num; *ptr; ptr++)
187	    if (!csdigit(*ptr)) {
188	      error("bad character `%1' in argument to -f option", *ptr);
189	      break;
190	    }
191	  if (*ptr == '\0') {
192	    string spec;
193	    spec = '%';
194	    spec += num;
195	    spec += '\0';
196	    set_label_spec(spec.contents());
197	    done_spec = 1;
198	  }
199	  break;
200	}
201      case 'b':
202	label_in_text = 0;
203	label_in_reference = 0;
204	opt++;
205	break;
206      case 'e':
207	accumulate = 1;
208	opt++;
209	break;
210      case 'c':
211	capitalize_fields = ++opt;
212	opt = 0;
213	break;
214      case 'k':
215	{
216	  char buf[5];
217	  if (csalpha(*++opt))
218	    buf[0] = *opt++;
219	  else {
220	    if (*opt != '\0')
221	      error("bad field name `%1'", *opt++);
222	    buf[0] = 'L';
223	  }
224	  buf[1] = '~';
225	  buf[2] = '%';
226	  buf[3] = 'a';
227	  buf[4] = '\0';
228	  set_label_spec(buf);
229	  done_spec = 1;
230	}
231	break;
232      case 'a':
233	{
234	  const char *ptr;
235	  for (ptr = ++opt; *ptr; ptr++)
236	    if (!csdigit(*ptr)) {
237	      error("argument to `a' option not a number");
238	      break;
239	    }
240	  if (*ptr == '\0') {
241	    reverse_fields = 'A';
242	    reverse_fields += opt;
243	  }
244	  opt = 0;
245	}
246	break;
247      case 'i':
248	linear_ignore_fields = ++opt;
249	opt = 0;
250	break;
251      case 'l':
252	{
253	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
254	  strcpy(buf, "A.n");
255	  if (*++opt != '\0' && *opt != ',') {
256	    char *ptr;
257	    long n = strtol(opt, &ptr, 10);
258	    if (n == 0 && ptr == opt) {
259	      error("bad integer `%1' in `l' option", opt);
260	      opt = 0;
261	      break;
262	    }
263	    if (n < 0)
264	      n = 0;
265	    opt = ptr;
266	    sprintf(strchr(buf, '\0'), "+%ld", n);
267	  }
268	  strcat(buf, "D.y");
269	  if (*opt == ',')
270	    opt++;
271	  if (*opt != '\0') {
272	    char *ptr;
273	    long n = strtol(opt, &ptr, 10);
274	    if (n == 0 && ptr == opt) {
275	      error("bad integer `%1' in `l' option", opt);
276	      opt = 0;
277	      break;
278	    }
279	    if (n < 0)
280	      n = 0;
281	    sprintf(strchr(buf, '\0'), "-%ld", n);
282	    opt = ptr;
283	    if (*opt != '\0')
284	      error("argument to `l' option not of form `m,n'");
285	  }
286	  strcat(buf, "%a");
287	  if (!set_label_spec(buf))
288	    assert(0);
289	  done_spec = 1;
290	}
291	break;
292      case 'n':
293	search_default = 0;
294	opt++;
295	break;
296      case 'p':
297	{
298	  const char *filename = 0;
299	  if (*++opt == '\0') {
300	    if (argc > 1) {
301	      filename = *++argv;
302	      argc--;
303	    }
304	    else {
305	      error("option `p' requires an argument");
306	      usage(stderr);
307	      exit(1);
308	    }
309	  }
310	  else {
311	    filename = opt;
312	    opt = 0;
313	  }
314	  database_list.add_file(filename);
315	}
316	break;
317      case 's':
318	if (*++opt == '\0')
319	  sort_fields = "AD";
320	else {
321	  sort_fields = opt;
322	  opt = 0;
323	}
324	accumulate = 1;
325	break;
326      case 't':
327	{
328	  char *ptr;
329	  long n = strtol(opt, &ptr, 10);
330	  if (n == 0 && ptr == opt) {
331	    error("bad integer `%1' in `t' option", opt);
332	    opt = 0;
333	    break;
334	  }
335	  if (n < 1)
336	    n = 1;
337	  linear_truncate_len = int(n);
338	  opt = ptr;
339	  break;
340	}
341      case '-':
342	if (opt[1] == '\0') {
343	  finished_options = 1;
344	  opt++;
345	  break;
346	}
347	if (strcmp(opt,"-version")==0) {
348      case 'v':
349	  printf("GNU refer (groff) version %s\n", Version_string);
350	  exit(0);
351	  break;
352	}
353	if (strcmp(opt,"-help")==0) {
354	  usage(stdout);
355	  exit(0);
356	  break;
357	}
358	// fall through
359      default:
360	error("unrecognized option `%1'", *opt);
361	usage(stderr);
362	exit(1);
363	break;
364      }
365    }
366  }
367  if (!done_spec)
368    set_label_spec("%1");
369  if (argc <= 0) {
370    if (bib_flag)
371      do_bib("-");
372    else
373      do_file("-");
374  }
375  else {
376    for (int i = 0; i < argc; i++) {
377      if (bib_flag)
378	do_bib(argv[i]);
379      else
380	do_file(argv[i]);
381    }
382  }
383  if (accumulate)
384    output_references();
385  if (fflush(stdout) < 0)
386    fatal("output error");
387  return 0;
388}
389
390static void usage(FILE *stream)
391{
392  fprintf(stream,
393"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
394"       [-sXYZ] [-tN] [-BL.M] [files ...]\n",
395	  program_name);
396}
397
398static void possibly_load_default_database()
399{
400  if (search_default && !default_database_loaded) {
401    char *filename = getenv("REFER");
402    if (filename)
403      database_list.add_file(filename);
404    else
405      database_list.add_file(DEFAULT_INDEX, 1);
406    default_database_loaded = 1;
407  }
408}
409
410static int is_list(const string &str)
411{
412  const char *start = str.contents();
413  const char *end = start + str.length();
414  while (end > start && csspace(end[-1]))
415    end--;
416  while (start < end && csspace(*start))
417    start++;
418  return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
419}
420
421static void do_file(const char *filename)
422{
423  FILE *fp;
424  if (strcmp(filename, "-") == 0) {
425    fp = stdin;
426  }
427  else {
428    errno = 0;
429    fp = fopen(filename, "r");
430    if (fp == 0) {
431      error("can't open `%1': %2", filename, strerror(errno));
432      return;
433    }
434  }
435  current_filename = filename;
436  fprintf(outfp, ".lf 1 %s\n", filename);
437  string line;
438  current_lineno = 0;
439  for (;;) {
440    line.clear();
441    for (;;) {
442      int c = getc(fp);
443      if (c == EOF) {
444	if (line.length() > 0)
445	  line += '\n';
446	break;
447      }
448      if (invalid_input_char(c))
449	error("invalid input character code %1", c);
450      else {
451	line += c;
452	if (c == '\n')
453	  break;
454      }
455    }
456    int len = line.length();
457    if (len == 0)
458      break;
459    current_lineno++;
460    if (len >= 2 && line[0] == '.' && line[1] == '[') {
461      int start_lineno = current_lineno;
462      int start_of_line = 1;
463      string str;
464      string post;
465      string pre(line.contents() + 2, line.length() - 3);
466      for (;;) {
467	int c = getc(fp);
468	if (c == EOF) {
469	  error_with_file_and_line(current_filename, start_lineno,
470				   "missing `.]' line");
471	  break;
472	}
473	if (start_of_line)
474	  current_lineno++;
475	if (start_of_line && c == '.') {
476	  int d = getc(fp);
477	  if (d == ']') {
478	    while ((d = getc(fp)) != '\n' && d != EOF) {
479	      if (invalid_input_char(d))
480		error("invalid input character code %1", d);
481	      else
482		post += d;
483	    }
484	    break;
485	  }
486	  if (d != EOF)
487	    ungetc(d, fp);
488	}
489	if (invalid_input_char(c))
490	  error("invalid input character code %1", c);
491	else
492	  str += c;
493	start_of_line = (c == '\n');
494      }
495      if (is_list(str)) {
496	output_pending_line();
497	if (accumulate)
498	  output_references();
499	else
500	  error("found `$LIST$' but not accumulating references");
501      }
502      else {
503	unsigned flags = (accumulate
504			  ? store_reference(str)
505			  : immediately_handle_reference(str));
506	if (label_in_text) {
507	  if (accumulate && outfp == stdout)
508	    divert_to_temporary_file();
509	  if (pending_line.length() == 0) {
510	    warning("can't attach citation to previous line");
511	  }
512	  else
513	    pending_line.set_length(pending_line.length() - 1);
514	  string punct;
515	  if (move_punctuation)
516	    split_punct(pending_line, punct);
517	  int have_text = pre.length() > 0 || post.length() > 0;
518	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
519					       |FORCE_RIGHT_BRACKET));
520	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
521	    pending_line += PRE_LABEL_MARKER;
522	  pending_line += pre;
523	  char lm = LABEL_MARKER + (int)lt;
524	  pending_line += lm;
525	  pending_line += post;
526	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
527	    pending_line += POST_LABEL_MARKER;
528	  pending_line += punct;
529	  pending_line += '\n';
530	}
531      }
532      need_syncing = 1;
533    }
534    else if (len >= 4
535	     && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
536	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
537      pending_lf_lines += line;
538      line += '\0';
539      if (interpret_lf_args(line.contents() + 3))
540	current_lineno--;
541    }
542    else if (recognize_R1_R2
543	     && len >= 4
544	     && line[0] == '.' && line[1] == 'R' && line[2] == '1'
545	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
546      line.clear();
547      int start_of_line = 1;
548      int start_lineno = current_lineno;
549      for (;;) {
550	int c = getc(fp);
551	if (c != EOF && start_of_line)
552	  current_lineno++;
553	if (start_of_line && c == '.') {
554	  c = getc(fp);
555	  if (c == 'R') {
556	    c = getc(fp);
557	    if (c == '2') {
558	      c = getc(fp);
559	      if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
560		while (c != EOF && c != '\n')
561		  c = getc(fp);
562		break;
563	      }
564	      else {
565		line += '.';
566		line += 'R';
567		line += '2';
568	      }
569	    }
570	    else {
571	      line += '.';
572	      line += 'R';
573	    }
574	  }
575	  else
576	    line += '.';
577	}
578	if (c == EOF) {
579	  error_with_file_and_line(current_filename, start_lineno,
580				   "missing `.R2' line");
581	  break;
582	}
583	if (invalid_input_char(c))
584	  error("invalid input character code %1", int(c));
585	else {
586	  line += c;
587	  start_of_line = c == '\n';
588	}
589      }
590      output_pending_line();
591      if (accumulate)
592	output_references();
593      else
594	nreferences = 0;
595      process_commands(line, current_filename, start_lineno + 1);
596      need_syncing = 1;
597    }
598    else {
599      output_pending_line();
600      pending_line = line;
601    }
602  }
603  need_syncing = 0;
604  output_pending_line();
605  if (fp != stdin)
606    fclose(fp);
607}
608
609class label_processing_state {
610  enum {
611    NORMAL,
612    PENDING_LABEL,
613    PENDING_LABEL_POST,
614    PENDING_LABEL_POST_PRE,
615    PENDING_POST
616    } state;
617  label_type type;		// type of pending labels
618  int count;			// number of pending labels
619  reference **rptr;		// pointer to next reference
620  int rcount;			// number of references left
621  FILE *fp;
622  int handle_pending(int c);
623public:
624  label_processing_state(reference **, int, FILE *);
625  ~label_processing_state();
626  void process(int c);
627};
628
629static void output_pending_line()
630{
631  if (label_in_text && !accumulate && ncitations > 0) {
632    label_processing_state state(citation, ncitations, outfp);
633    int len = pending_line.length();
634    for (int i = 0; i < len; i++)
635      state.process((unsigned char)(pending_line[i]));
636  }
637  else
638    put_string(pending_line, outfp);
639  pending_line.clear();
640  if (pending_lf_lines.length() > 0) {
641    put_string(pending_lf_lines, outfp);
642    pending_lf_lines.clear();
643  }
644  if (!accumulate)
645    immediately_output_references();
646  if (need_syncing) {
647    fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
648    need_syncing = 0;
649  }
650}
651
652static void split_punct(string &line, string &punct)
653{
654  const char *start = line.contents();
655  const char *end = start + line.length();
656  const char *ptr = start;
657  const char *last_token_start = 0;
658  for (;;) {
659    if (ptr >= end)
660      break;
661    last_token_start = ptr;
662    if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
663	|| (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
664      ptr++;
665    else if (!get_token(&ptr, end))
666      break;
667  }
668  if (last_token_start) {
669    const token_info *ti = lookup_token(last_token_start, end);
670    if (ti->is_punct()) {
671      punct.append(last_token_start, end - last_token_start);
672      line.set_length(last_token_start - start);
673    }
674  }
675}
676
677static void divert_to_temporary_file()
678{
679  outfp = xtmpfile();
680}
681
682static void store_citation(reference *ref)
683{
684  if (ncitations >= citation_max) {
685    if (citation == 0)
686      citation = new reference*[citation_max = 100];
687    else {
688      reference **old_citation = citation;
689      citation_max *= 2;
690      citation = new reference *[citation_max];
691      memcpy(citation, old_citation, ncitations*sizeof(reference *));
692      a_delete old_citation;
693    }
694  }
695  citation[ncitations++] = ref;
696}
697
698static unsigned store_reference(const string &str)
699{
700  if (reference_hash_table == 0) {
701    reference_hash_table = new reference *[17];
702    hash_table_size = 17;
703    for (int i = 0; i < hash_table_size; i++)
704      reference_hash_table[i] = 0;
705  }
706  unsigned flags;
707  reference *ref = make_reference(str, &flags);
708  ref->compute_hash_code();
709  unsigned h = ref->hash();
710  reference **ptr;
711  for (ptr = reference_hash_table + (h % hash_table_size);
712       *ptr != 0;
713       ((ptr == reference_hash_table)
714	? (ptr = reference_hash_table + hash_table_size - 1)
715	: --ptr))
716    if (same_reference(**ptr, *ref))
717      break;
718  if (*ptr != 0) {
719    if (ref->is_merged())
720      warning("fields ignored because reference already used");
721    delete ref;
722    ref = *ptr;
723  }
724  else {
725    *ptr = ref;
726    ref->set_number(nreferences);
727    nreferences++;
728    ref->pre_compute_label();
729    ref->compute_sort_key();
730    if (nreferences*2 >= hash_table_size) {
731      // Rehash it.
732      reference **old_table = reference_hash_table;
733      int old_size = hash_table_size;
734      hash_table_size = next_size(hash_table_size);
735      reference_hash_table = new reference*[hash_table_size];
736      int i;
737      for (i = 0; i < hash_table_size; i++)
738	reference_hash_table[i] = 0;
739      for (i = 0; i < old_size; i++)
740	if (old_table[i]) {
741	  reference **p;
742	  for (p = (reference_hash_table
743				+ (old_table[i]->hash() % hash_table_size));
744	       *p;
745	       ((p == reference_hash_table)
746		? (p = reference_hash_table + hash_table_size - 1)
747		: --p))
748	    ;
749	  *p = old_table[i];
750	}
751      a_delete old_table;
752    }
753  }
754  if (label_in_text)
755    store_citation(ref);
756  return flags;
757}
758
759unsigned immediately_handle_reference(const string &str)
760{
761  unsigned flags;
762  reference *ref = make_reference(str, &flags);
763  ref->set_number(nreferences);
764  if (label_in_text || label_in_reference) {
765    ref->pre_compute_label();
766    ref->immediate_compute_label();
767  }
768  nreferences++;
769  store_citation(ref);
770  return flags;
771}
772
773static void immediately_output_references()
774{
775  for (int i = 0; i < ncitations; i++) {
776    reference *ref = citation[i];
777    if (label_in_reference) {
778      fputs(".ds [F ", outfp);
779      const string &label = ref->get_label(NORMAL_LABEL);
780      if (label.length() > 0
781	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
782	putc('"', outfp);
783      put_string(label, outfp);
784      putc('\n', outfp);
785    }
786    ref->output(outfp);
787    delete ref;
788  }
789  ncitations = 0;
790}
791
792static void output_citation_group(reference **v, int n, label_type type,
793				  FILE *fp)
794{
795  if (sort_adjacent_labels) {
796    // Do an insertion sort.  Usually n will be very small.
797    for (int i = 1; i < n; i++) {
798      int num = v[i]->get_number();
799      reference *temp = v[i];
800      int j;
801      for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
802	v[j + 1] = v[j];
803      v[j + 1] = temp;
804    }
805  }
806  // This messes up if !accumulate.
807  if (accumulate && n > 1) {
808    // remove duplicates
809    int j = 1;
810    for (int i = 1; i < n; i++)
811      if (v[i]->get_label(type) != v[i - 1]->get_label(type))
812	v[j++] = v[i];
813    n = j;
814  }
815  string merged_label;
816  for (int i = 0; i < n; i++) {
817    int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
818    if (nmerged > 0) {
819      put_string(merged_label, fp);
820      i += nmerged;
821    }
822    else
823      put_string(v[i]->get_label(type), fp);
824    if (i < n - 1)
825      put_string(sep_label, fp);
826  }
827}
828
829
830label_processing_state::label_processing_state(reference **p, int n, FILE *f)
831: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
832{
833}
834
835label_processing_state::~label_processing_state()
836{
837  int handled = handle_pending(EOF);
838  assert(!handled);
839  assert(rcount == 0);
840}
841
842int label_processing_state::handle_pending(int c)
843{
844  switch (state) {
845  case NORMAL:
846    break;
847  case PENDING_LABEL:
848    if (c == POST_LABEL_MARKER) {
849      state = PENDING_LABEL_POST;
850      return 1;
851    }
852    else {
853      output_citation_group(rptr, count, type, fp);
854      rptr += count ;
855      rcount -= count;
856      state = NORMAL;
857    }
858    break;
859  case PENDING_LABEL_POST:
860    if (c == PRE_LABEL_MARKER) {
861      state = PENDING_LABEL_POST_PRE;
862      return 1;
863    }
864    else {
865      output_citation_group(rptr, count, type, fp);
866      rptr += count;
867      rcount -= count;
868      put_string(post_label, fp);
869      state = NORMAL;
870    }
871    break;
872  case PENDING_LABEL_POST_PRE:
873    if (c >= LABEL_MARKER
874	&& c < LABEL_MARKER + N_LABEL_TYPES
875	&& c - LABEL_MARKER == type) {
876      count += 1;
877      state = PENDING_LABEL;
878      return 1;
879    }
880    else {
881      output_citation_group(rptr, count, type, fp);
882      rptr += count;
883      rcount -= count;
884      put_string(sep_label, fp);
885      state = NORMAL;
886    }
887    break;
888  case PENDING_POST:
889    if (c == PRE_LABEL_MARKER) {
890      put_string(sep_label, fp);
891      state = NORMAL;
892      return 1;
893    }
894    else {
895      put_string(post_label, fp);
896      state = NORMAL;
897    }
898    break;
899  }
900  return 0;
901}
902
903void label_processing_state::process(int c)
904{
905  if (handle_pending(c))
906    return;
907  assert(state == NORMAL);
908  switch (c) {
909  case PRE_LABEL_MARKER:
910    put_string(pre_label, fp);
911    state = NORMAL;
912    break;
913  case POST_LABEL_MARKER:
914    state = PENDING_POST;
915    break;
916  case LABEL_MARKER:
917  case LABEL_MARKER + 1:
918    count = 1;
919    state = PENDING_LABEL;
920    type = label_type(c - LABEL_MARKER);
921    break;
922  default:
923    state = NORMAL;
924    putc(c, fp);
925    break;
926  }
927}
928
929extern "C" {
930
931int rcompare(const void *p1, const void *p2)
932{
933  return compare_reference(**(reference **)p1, **(reference **)p2);
934}
935
936}
937
938void output_references()
939{
940  assert(accumulate);
941  if (nreferences > 0) {
942    int j = 0;
943    int i;
944    for (i = 0; i < hash_table_size; i++)
945      if (reference_hash_table[i] != 0)
946	reference_hash_table[j++] = reference_hash_table[i];
947    assert(j == nreferences);
948    for (; j < hash_table_size; j++)
949      reference_hash_table[j] = 0;
950    qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
951    for (i = 0; i < nreferences; i++)
952      reference_hash_table[i]->set_number(i);
953    compute_labels(reference_hash_table, nreferences);
954  }
955  if (outfp != stdout) {
956    rewind(outfp);
957    {
958      label_processing_state state(citation, ncitations, stdout);
959      int c;
960      while ((c = getc(outfp)) != EOF)
961	state.process(c);
962    }
963    ncitations = 0;
964    fclose(outfp);
965    outfp = stdout;
966  }
967  if (nreferences > 0) {
968    fputs(".]<\n", outfp);
969    for (int i = 0; i < nreferences; i++) {
970      if (sort_fields.length() > 0)
971	reference_hash_table[i]->print_sort_key_comment(outfp);
972      if (label_in_reference) {
973	fputs(".ds [F ", outfp);
974	const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
975	if (label.length() > 0
976	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
977	  putc('"', outfp);
978	put_string(label, outfp);
979	putc('\n', outfp);
980      }
981      reference_hash_table[i]->output(outfp);
982      delete reference_hash_table[i];
983      reference_hash_table[i] = 0;
984    }
985    fputs(".]>\n", outfp);
986    nreferences = 0;
987  }
988  clear_labels();
989}
990
991static reference *find_reference(const char *query, int query_len)
992{
993  // This is so that error messages look better.
994  while (query_len > 0 && csspace(query[query_len - 1]))
995    query_len--;
996  string str;
997  for (int i = 0; i < query_len; i++)
998    str += query[i] == '\n' ? ' ' : query[i];
999  str += '\0';
1000  possibly_load_default_database();
1001  search_list_iterator iter(&database_list, str.contents());
1002  reference_id rid;
1003  const char *start;
1004  int len;
1005  if (!iter.next(&start, &len, &rid)) {
1006    error("no matches for `%1'", str.contents());
1007    return 0;
1008  }
1009  const char *end = start + len;
1010  while (start < end) {
1011    if (*start == '%')
1012      break;
1013    while (start < end && *start++ != '\n')
1014      ;
1015  }
1016  if (start >= end) {
1017    error("found a reference for `%1' but it didn't contain any fields",
1018	  str.contents());
1019    return 0;
1020  }
1021  reference *result = new reference(start, end - start, &rid);
1022  if (iter.next(&start, &len, &rid))
1023    warning("multiple matches for `%1'", str.contents());
1024  return result;
1025}
1026
1027static reference *make_reference(const string &str, unsigned *flagsp)
1028{
1029  const char *start = str.contents();
1030  const char *end = start + str.length();
1031  const char *ptr = start;
1032  while (ptr < end) {
1033    if (*ptr == '%')
1034      break;
1035    while (ptr < end && *ptr++ != '\n')
1036      ;
1037  }
1038  *flagsp = 0;
1039  for (; start < ptr; start++) {
1040    if (*start == '#')
1041      *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1042					   | FORCE_LEFT_BRACKET)));
1043    else if (*start == '[')
1044      *flagsp |= FORCE_LEFT_BRACKET;
1045    else if (*start == ']')
1046      *flagsp |= FORCE_RIGHT_BRACKET;
1047    else if (!csspace(*start))
1048      break;
1049  }
1050  if (start >= end) {
1051    error("empty reference");
1052    return new reference;
1053  }
1054  reference *database_ref = 0;
1055  if (start < ptr)
1056    database_ref = find_reference(start, ptr - start);
1057  reference *inline_ref = 0;
1058  if (ptr < end)
1059    inline_ref = new reference(ptr, end - ptr);
1060  if (inline_ref) {
1061    if (database_ref) {
1062      database_ref->merge(*inline_ref);
1063      delete inline_ref;
1064      return database_ref;
1065    }
1066    else
1067      return inline_ref;
1068  }
1069  else if (database_ref)
1070    return database_ref;
1071  else
1072    return new reference;
1073}
1074
1075static void do_ref(const string &str)
1076{
1077  if (accumulate)
1078    (void)store_reference(str);
1079  else {
1080    (void)immediately_handle_reference(str);
1081    immediately_output_references();
1082  }
1083}
1084
1085static void trim_blanks(string &str)
1086{
1087  const char *start = str.contents();
1088  const char *end = start + str.length();
1089  while (end > start && end[-1] != '\n' && csspace(end[-1]))
1090    --end;
1091  str.set_length(end - start);
1092}
1093
1094void do_bib(const char *filename)
1095{
1096  FILE *fp;
1097  if (strcmp(filename, "-") == 0)
1098    fp = stdin;
1099  else {
1100    errno = 0;
1101    fp = fopen(filename, "r");
1102    if (fp == 0) {
1103      error("can't open `%1': %2", filename, strerror(errno));
1104      return;
1105    }
1106    current_filename = filename;
1107  }
1108  enum {
1109    START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1110    } state = START;
1111  string body;
1112  for (;;) {
1113    int c = getc(fp);
1114    if (c == EOF)
1115      break;
1116    if (invalid_input_char(c)) {
1117      error("invalid input character code %1", c);
1118      continue;
1119    }
1120    switch (state) {
1121    case START:
1122      if (c == '%') {
1123	body = c;
1124	state = BODY;
1125      }
1126      else if (c != '\n')
1127	state = MIDDLE;
1128      break;
1129    case MIDDLE:
1130      if (c == '\n')
1131	state = START;
1132      break;
1133    case BODY:
1134      body += c;
1135      if (c == '\n')
1136	state = BODY_START;
1137      break;
1138    case BODY_START:
1139      if (c == '\n') {
1140	do_ref(body);
1141	state = START;
1142      }
1143      else if (c == '.')
1144	state = BODY_DOT;
1145      else if (csspace(c)) {
1146	state = BODY_BLANK;
1147	body += c;
1148      }
1149      else {
1150	body += c;
1151	state = BODY;
1152      }
1153      break;
1154    case BODY_BLANK:
1155      if (c == '\n') {
1156	trim_blanks(body);
1157	do_ref(body);
1158	state = START;
1159      }
1160      else if (csspace(c))
1161	body += c;
1162      else {
1163	body += c;
1164	state = BODY;
1165      }
1166      break;
1167    case BODY_DOT:
1168      if (c == ']') {
1169	do_ref(body);
1170	state = MIDDLE;
1171      }
1172      else {
1173	body += '.';
1174	body += c;
1175	state = c == '\n' ? BODY_START : BODY;
1176      }
1177      break;
1178    default:
1179      assert(0);
1180    }
1181    if (c == '\n')
1182      current_lineno++;
1183  }
1184  switch (state) {
1185  case START:
1186  case MIDDLE:
1187    break;
1188  case BODY:
1189    body += '\n';
1190    do_ref(body);
1191    break;
1192  case BODY_DOT:
1193  case BODY_START:
1194    do_ref(body);
1195    break;
1196  case BODY_BLANK:
1197    trim_blanks(body);
1198    do_ref(body);
1199    break;
1200  }
1201  fclose(fp);
1202}
1203
1204// from the Dragon Book
1205
1206unsigned hash_string(const char *s, int len)
1207{
1208  const char *end = s + len;
1209  unsigned h = 0, g;
1210  while (s < end) {
1211    h <<= 4;
1212    h += *s++;
1213    if ((g = h & 0xf0000000) != 0) {
1214      h ^= g >> 24;
1215      h ^= g;
1216    }
1217  }
1218  return h;
1219}
1220
1221int next_size(int n)
1222{
1223  static const int table_sizes[] = {
1224    101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1225    80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1226    16000057, 32000011, 64000031, 128000003, 0
1227  };
1228
1229  const int *p;
1230  for (p = table_sizes; *p <= n && *p != 0; p++)
1231    ;
1232  assert(*p != 0);
1233  return *p;
1234}
1235
1236