1// -*- C++ -*-
2/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2004
3   Free Software Foundation, Inc.
4     Written by James Clark (jjc@jclark.com)
5
6This file is part of groff.
7
8groff is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 2, or (at your option) any later
11version.
12
13groff is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License along
19with groff; see the file COPYING.  If not, write to the Free Software
20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
21
22#include "refer.h"
23#include "refid.h"
24#include "search.h"
25#include "command.h"
26
27cset cs_field_name = csalpha;
28
29class input_item {
30  input_item *next;
31  char *filename;
32  int first_lineno;
33  string buffer;
34  const char *ptr;
35  const char *end;
36public:
37  input_item(string &, const char *, int = 1);
38  ~input_item();
39  int get_char();
40  int peek_char();
41  void skip_char();
42  int get_location(const char **, int *);
43
44  friend class input_stack;
45};
46
47input_item::input_item(string &s, const char *fn, int ln)
48: filename(strsave(fn)), first_lineno(ln)
49{
50  buffer.move(s);
51  ptr = buffer.contents();
52  end = ptr + buffer.length();
53}
54
55input_item::~input_item()
56{
57  a_delete filename;
58}
59
60inline int input_item::peek_char()
61{
62  if (ptr >= end)
63    return EOF;
64  else
65    return (unsigned char)*ptr;
66}
67
68inline int input_item::get_char()
69{
70  if (ptr >= end)
71    return EOF;
72  else
73    return (unsigned char)*ptr++;
74}
75
76inline void input_item::skip_char()
77{
78  ptr++;
79}
80
81int input_item::get_location(const char **filenamep, int *linenop)
82{
83  *filenamep = filename;
84  if (ptr == buffer.contents())
85    *linenop = first_lineno;
86  else {
87    int ln = first_lineno;
88    const char *e = ptr - 1;
89    for (const char *p = buffer.contents(); p < e; p++)
90      if (*p == '\n')
91	ln++;
92    *linenop = ln;
93  }
94  return 1;
95}
96
97class input_stack {
98  static input_item *top;
99public:
100  static void init();
101  static int get_char();
102  static int peek_char();
103  static void skip_char() { top->skip_char(); }
104  static void push_file(const char *);
105  static void push_string(string &, const char *, int);
106  static void error(const char *format,
107		    const errarg &arg1 = empty_errarg,
108		    const errarg &arg2 = empty_errarg,
109		    const errarg &arg3 = empty_errarg);
110};
111
112input_item *input_stack::top = 0;
113
114void input_stack::init()
115{
116  while (top) {
117    input_item *tem = top;
118    top = top->next;
119    delete tem;
120  }
121}
122
123int input_stack::get_char()
124{
125  while (top) {
126    int c = top->get_char();
127    if (c >= 0)
128      return c;
129    input_item *tem = top;
130    top = top->next;
131    delete tem;
132  }
133  return -1;
134}
135
136int input_stack::peek_char()
137{
138  while (top) {
139    int c = top->peek_char();
140    if (c >= 0)
141      return c;
142    input_item *tem = top;
143    top = top->next;
144    delete tem;
145  }
146  return -1;
147}
148
149void input_stack::push_file(const char *fn)
150{
151  FILE *fp;
152  if (strcmp(fn, "-") == 0) {
153    fp = stdin;
154    fn = "<standard input>";
155  }
156  else {
157    errno = 0;
158    fp = fopen(fn, "r");
159    if (fp == 0) {
160      error("can't open `%1': %2", fn, strerror(errno));
161      return;
162    }
163  }
164  string buf;
165  int bol = 1;
166  int lineno = 1;
167  for (;;) {
168    int c = getc(fp);
169    if (bol && c == '.') {
170      // replace lines beginning with .R1 or .R2 with a blank line
171      c = getc(fp);
172      if (c == 'R') {
173	c = getc(fp);
174	if (c == '1' || c == '2') {
175	  int cc = c;
176	  c = getc(fp);
177	  if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
178	    while (c != '\n' && c != EOF)
179	      c = getc(fp);
180	  }
181	  else {
182	    buf += '.';
183	    buf += 'R';
184	    buf += cc;
185	  }
186	}
187	else {
188	  buf += '.';
189	  buf += 'R';
190	}
191      }
192      else
193	buf += '.';
194    }
195    if (c == EOF)
196      break;
197    if (invalid_input_char(c))
198      error_with_file_and_line(fn, lineno,
199			       "invalid input character code %1", int(c));
200    else {
201      buf += c;
202      if (c == '\n') {
203	bol = 1;
204	lineno++;
205      }
206      else
207	bol = 0;
208    }
209  }
210  if (fp != stdin)
211    fclose(fp);
212  if (buf.length() > 0 && buf[buf.length() - 1] != '\n')
213    buf += '\n';
214  input_item *it = new input_item(buf, fn);
215  it->next = top;
216  top = it;
217}
218
219void input_stack::push_string(string &s, const char *filename, int lineno)
220{
221  input_item *it = new input_item(s, filename, lineno);
222  it->next = top;
223  top = it;
224}
225
226void input_stack::error(const char *format, const errarg &arg1,
227			const errarg &arg2, const errarg &arg3)
228{
229  const char *filename;
230  int lineno;
231  for (input_item *it = top; it; it = it->next)
232    if (it->get_location(&filename, &lineno)) {
233      error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3);
234      return;
235    }
236  ::error(format, arg1, arg2, arg3);
237}
238
239void command_error(const char *format, const errarg &arg1,
240		   const errarg &arg2, const errarg &arg3)
241{
242  input_stack::error(format, arg1, arg2, arg3);
243}
244
245// # not recognized in ""
246// \<newline> is recognized in ""
247// # does not conceal newline
248// if missing closing quote, word extends to end of line
249// no special treatment of \ other than before newline
250// \<newline> not recognized after #
251// ; allowed as alternative to newline
252// ; not recognized in ""
253// don't clear word_buffer; just append on
254// return -1 for EOF, 0 for newline, 1 for word
255
256int get_word(string &word_buffer)
257{
258  int c = input_stack::get_char();
259  for (;;) {
260    if (c == '#') {
261      do {
262	c = input_stack::get_char();
263      } while (c != '\n' && c != EOF);
264      break;
265    }
266    if (c == '\\' && input_stack::peek_char() == '\n')
267      input_stack::skip_char();
268    else if (c != ' ' && c != '\t')
269      break;
270    c = input_stack::get_char();
271  }
272  if (c == EOF)
273    return -1;
274  if (c == '\n' || c == ';')
275    return 0;
276  if (c == '"') {
277    for (;;) {
278      c = input_stack::peek_char();
279      if (c == EOF || c == '\n')
280	break;
281      input_stack::skip_char();
282      if (c == '"') {
283	int d = input_stack::peek_char();
284	if (d == '"')
285	  input_stack::skip_char();
286	else
287	  break;
288      }
289      else if (c == '\\') {
290	int d = input_stack::peek_char();
291	if (d == '\n')
292	  input_stack::skip_char();
293	else
294	  word_buffer += '\\';
295      }
296      else
297	word_buffer += c;
298    }
299    return 1;
300  }
301  word_buffer += c;
302  for (;;) {
303    c = input_stack::peek_char();
304    if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';')
305      break;
306    input_stack::skip_char();
307    if (c == '\\') {
308      int d = input_stack::peek_char();
309      if (d == '\n')
310	input_stack::skip_char();
311      else
312	word_buffer += '\\';
313    }
314    else
315      word_buffer += c;
316  }
317  return 1;
318}
319
320union argument {
321  const char *s;
322  int n;
323};
324
325// This is for debugging.
326
327static void echo_command(int argc, argument *argv)
328{
329  for (int i = 0; i < argc; i++)
330    fprintf(stderr, "%s\n", argv[i].s);
331}
332
333static void include_command(int argc, argument *argv)
334{
335  assert(argc == 1);
336  input_stack::push_file(argv[0].s);
337}
338
339static void capitalize_command(int argc, argument *argv)
340{
341  if (argc > 0)
342    capitalize_fields = argv[0].s;
343  else
344    capitalize_fields.clear();
345}
346
347static void accumulate_command(int, argument *)
348{
349  accumulate = 1;
350}
351
352static void no_accumulate_command(int, argument *)
353{
354  accumulate = 0;
355}
356
357static void move_punctuation_command(int, argument *)
358{
359  move_punctuation = 1;
360}
361
362static void no_move_punctuation_command(int, argument *)
363{
364  move_punctuation = 0;
365}
366
367static void sort_command(int argc, argument *argv)
368{
369  if (argc == 0)
370    sort_fields = "AD";
371  else
372    sort_fields = argv[0].s;
373  accumulate = 1;
374}
375
376static void no_sort_command(int, argument *)
377{
378  sort_fields.clear();
379}
380
381static void articles_command(int argc, argument *argv)
382{
383  articles.clear();
384  int i;
385  for (i = 0; i < argc; i++) {
386    articles += argv[i].s;
387    articles += '\0';
388  }
389  int len = articles.length();
390  for (i = 0; i < len; i++)
391    articles[i] = cmlower(articles[i]);
392}
393
394static void database_command(int argc, argument *argv)
395{
396  for (int i = 0; i < argc; i++)
397    database_list.add_file(argv[i].s);
398}
399
400static void default_database_command(int, argument *)
401{
402  search_default = 1;
403}
404
405static void no_default_database_command(int, argument *)
406{
407  search_default = 0;
408}
409
410static void bibliography_command(int argc, argument *argv)
411{
412  const char *saved_filename = current_filename;
413  int saved_lineno = current_lineno;
414  int saved_label_in_text = label_in_text;
415  label_in_text = 0;
416  if (!accumulate)
417    fputs(".]<\n", stdout);
418  for (int i = 0; i < argc; i++)
419    do_bib(argv[i].s);
420  if (accumulate)
421    output_references();
422  else
423    fputs(".]>\n", stdout);
424  current_filename = saved_filename;
425  current_lineno = saved_lineno;
426  label_in_text = saved_label_in_text;
427}
428
429static void annotate_command(int argc, argument *argv)
430{
431  if (argc > 0)
432    annotation_field = argv[0].s[0];
433  else
434    annotation_field = 'X';
435  if (argc == 2)
436    annotation_macro = argv[1].s;
437  else
438    annotation_macro = "AP";
439}
440
441static void no_annotate_command(int, argument *)
442{
443  annotation_macro.clear();
444  annotation_field = -1;
445}
446
447static void reverse_command(int, argument *argv)
448{
449  reverse_fields = argv[0].s;
450}
451
452static void no_reverse_command(int, argument *)
453{
454  reverse_fields.clear();
455}
456
457static void abbreviate_command(int argc, argument *argv)
458{
459  abbreviate_fields = argv[0].s;
460  period_before_initial = argc > 1 ? argv[1].s : ". ";
461  period_before_last_name = argc > 2 ? argv[2].s : ". ";
462  period_before_other = argc > 3 ? argv[3].s : ". ";
463  period_before_hyphen = argc > 4 ? argv[4].s : ".";
464}
465
466static void no_abbreviate_command(int, argument *)
467{
468  abbreviate_fields.clear();
469}
470
471string search_ignore_fields;
472
473static void search_ignore_command(int argc, argument *argv)
474{
475  if (argc > 0)
476    search_ignore_fields = argv[0].s;
477  else
478    search_ignore_fields = "XYZ";
479  search_ignore_fields += '\0';
480  linear_ignore_fields = search_ignore_fields.contents();
481}
482
483static void no_search_ignore_command(int, argument *)
484{
485  linear_ignore_fields = "";
486}
487
488static void search_truncate_command(int argc, argument *argv)
489{
490  if (argc > 0)
491    linear_truncate_len = argv[0].n;
492  else
493    linear_truncate_len = 6;
494}
495
496static void no_search_truncate_command(int, argument *)
497{
498  linear_truncate_len = -1;
499}
500
501static void discard_command(int argc, argument *argv)
502{
503  if (argc == 0)
504    discard_fields = "XYZ";
505  else
506    discard_fields = argv[0].s;
507  accumulate = 1;
508}
509
510static void no_discard_command(int, argument *)
511{
512  discard_fields.clear();
513}
514
515static void label_command(int, argument *argv)
516{
517  set_label_spec(argv[0].s);
518}
519
520static void abbreviate_label_ranges_command(int argc, argument *argv)
521{
522  abbreviate_label_ranges = 1;
523  label_range_indicator = argc > 0 ? argv[0].s : "-";
524}
525
526static void no_abbreviate_label_ranges_command(int, argument *)
527{
528  abbreviate_label_ranges = 0;
529}
530
531static void label_in_reference_command(int, argument *)
532{
533  label_in_reference = 1;
534}
535
536static void no_label_in_reference_command(int, argument *)
537{
538  label_in_reference = 0;
539}
540
541static void label_in_text_command(int, argument *)
542{
543  label_in_text = 1;
544}
545
546static void no_label_in_text_command(int, argument *)
547{
548  label_in_text = 0;
549}
550
551static void sort_adjacent_labels_command(int, argument *)
552{
553  sort_adjacent_labels = 1;
554}
555
556static void no_sort_adjacent_labels_command(int, argument *)
557{
558  sort_adjacent_labels = 0;
559}
560
561static void date_as_label_command(int argc, argument *argv)
562{
563  if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*"))
564    date_as_label = 1;
565}
566
567static void no_date_as_label_command(int, argument *)
568{
569  date_as_label = 0;
570}
571
572static void short_label_command(int, argument *argv)
573{
574  if (set_short_label_spec(argv[0].s))
575    short_label_flag = 1;
576}
577
578static void no_short_label_command(int, argument *)
579{
580  short_label_flag = 0;
581}
582
583static void compatible_command(int, argument *)
584{
585  compatible_flag = 1;
586}
587
588static void no_compatible_command(int, argument *)
589{
590  compatible_flag = 0;
591}
592
593static void join_authors_command(int argc, argument *argv)
594{
595  join_authors_exactly_two = argv[0].s;
596  join_authors_default = argc > 1 ? argv[1].s : argv[0].s;
597  join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s;
598}
599
600static void bracket_label_command(int, argument *argv)
601{
602  pre_label = argv[0].s;
603  post_label = argv[1].s;
604  sep_label = argv[2].s;
605}
606
607static void separate_label_second_parts_command(int, argument *argv)
608{
609  separate_label_second_parts = argv[0].s;
610}
611
612static void et_al_command(int argc, argument *argv)
613{
614  et_al = argv[0].s;
615  et_al_min_elide = argv[1].n;
616  if (et_al_min_elide < 1)
617    et_al_min_elide = 1;
618  et_al_min_total = argc >= 3 ? argv[2].n : 0;
619}
620
621static void no_et_al_command(int, argument *)
622{
623  et_al.clear();
624  et_al_min_elide = 0;
625}
626
627typedef void (*command_t)(int, argument *);
628
629/* arg_types is a string describing the numbers and types of arguments.
630s means a string, i means an integer, f is a list of fields, F is
631a single field,
632? means that the previous argument is optional, * means that the
633previous argument can occur any number of times. */
634
635struct S {
636  const char *name;
637  command_t func;
638  const char *arg_types;
639} command_table[] = {
640  { "include", include_command, "s" },
641  { "echo", echo_command, "s*" },
642  { "capitalize", capitalize_command, "f?" },
643  { "accumulate", accumulate_command, "" },
644  { "no-accumulate", no_accumulate_command, "" },
645  { "move-punctuation", move_punctuation_command, "" },
646  { "no-move-punctuation", no_move_punctuation_command, "" },
647  { "sort", sort_command, "s?" },
648  { "no-sort", no_sort_command, "" },
649  { "articles", articles_command, "s*" },
650  { "database", database_command, "ss*" },
651  { "default-database", default_database_command, "" },
652  { "no-default-database", no_default_database_command, "" },
653  { "bibliography", bibliography_command, "ss*" },
654  { "annotate", annotate_command, "F?s?" },
655  { "no-annotate", no_annotate_command, "" },
656  { "reverse", reverse_command, "s" },
657  { "no-reverse", no_reverse_command, "" },
658  { "abbreviate", abbreviate_command, "ss?s?s?s?" },
659  { "no-abbreviate", no_abbreviate_command, "" },
660  { "search-ignore", search_ignore_command, "f?" },
661  { "no-search-ignore", no_search_ignore_command, "" },
662  { "search-truncate", search_truncate_command, "i?" },
663  { "no-search-truncate", no_search_truncate_command, "" },
664  { "discard", discard_command, "f?" },
665  { "no-discard", no_discard_command, "" },
666  { "label", label_command, "s" },
667  { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" },
668  { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" },
669  { "label-in-reference", label_in_reference_command, "" },
670  { "no-label-in-reference", no_label_in_reference_command, "" },
671  { "label-in-text", label_in_text_command, "" },
672  { "no-label-in-text", no_label_in_text_command, "" },
673  { "sort-adjacent-labels", sort_adjacent_labels_command, "" },
674  { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" },
675  { "date-as-label", date_as_label_command, "s?" },
676  { "no-date-as-label", no_date_as_label_command, "" },
677  { "short-label", short_label_command, "s" },
678  { "no-short-label", no_short_label_command, "" },
679  { "compatible", compatible_command, "" },
680  { "no-compatible", no_compatible_command, "" },
681  { "join-authors", join_authors_command, "sss?" },
682  { "bracket-label", bracket_label_command, "sss" },
683  { "separate-label-second-parts", separate_label_second_parts_command, "s" },
684  { "et-al", et_al_command, "sii?" },
685  { "no-et-al", no_et_al_command, "" },
686};
687
688static int check_args(const char *types, const char *name,
689		      int argc, argument *argv)
690{
691  int argno = 0;
692  while (*types) {
693    if (argc == 0) {
694      if (types[1] == '?')
695	break;
696      else if (types[1] == '*') {
697	assert(types[2] == '\0');
698	break;
699      }
700      else {
701	input_stack::error("missing argument for command `%1'", name);
702	return 0;
703      }
704    }
705    switch (*types) {
706    case 's':
707      break;
708    case 'i':
709      {
710	char *ptr;
711	long n = strtol(argv->s, &ptr, 10);
712	if ((n == 0 && ptr == argv->s)
713	    || *ptr != '\0') {
714	  input_stack::error("argument %1 for command `%2' must be an integer",
715			     argno + 1, name);
716	  return 0;
717	}
718	argv->n = (int)n;
719	break;
720      }
721    case 'f':
722      {
723	for (const char *ptr = argv->s; *ptr != '\0'; ptr++)
724	  if (!cs_field_name(*ptr)) {
725	    input_stack::error("argument %1 for command `%2' must be a list of fields",
726			     argno + 1, name);
727	    return 0;
728	  }
729	break;
730      }
731    case 'F':
732      if (argv->s[0] == '\0' || argv->s[1] != '\0'
733	  || !cs_field_name(argv->s[0])) {
734	input_stack::error("argument %1 for command `%2' must be a field name",
735			   argno + 1, name);
736	return 0;
737      }
738      break;
739    default:
740      assert(0);
741    }
742    if (types[1] == '?')
743      types += 2;
744    else if (types[1] != '*')
745      types += 1;
746    --argc;
747    ++argv;
748    ++argno;
749  }
750  if (argc > 0) {
751    input_stack::error("too many arguments for command `%1'", name);
752    return 0;
753  }
754  return 1;
755}
756
757static void execute_command(const char *name, int argc, argument *argv)
758{
759  for (unsigned int i = 0;
760       i < sizeof(command_table)/sizeof(command_table[0]); i++)
761    if (strcmp(name, command_table[i].name) == 0) {
762      if (check_args(command_table[i].arg_types, name, argc, argv))
763	(*command_table[i].func)(argc, argv);
764      return;
765    }
766  input_stack::error("unknown command `%1'", name);
767}
768
769static void command_loop()
770{
771  string command;
772  for (;;) {
773    command.clear();
774    int res = get_word(command);
775    if (res != 1) {
776      if (res == 0)
777	continue;
778      break;
779    }
780    int argc = 0;
781    command += '\0';
782    while ((res = get_word(command)) == 1) {
783      argc++;
784      command += '\0';
785    }
786    argument *argv = new argument[argc];
787    const char *ptr = command.contents();
788    for (int i = 0; i < argc; i++)
789      argv[i].s = ptr = strchr(ptr, '\0') + 1;
790    execute_command(command.contents(), argc, argv);
791    a_delete argv;
792    if (res == -1)
793      break;
794  }
795}
796
797void process_commands(const char *file)
798{
799  input_stack::init();
800  input_stack::push_file(file);
801  command_loop();
802}
803
804void process_commands(string &s, const char *file, int lineno)
805{
806  input_stack::init();
807  input_stack::push_string(s, file, lineno);
808  command_loop();
809}
810