1/*	$NetBSD$	*/
2
3// -*- C++ -*-
4/* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003, 2004, 2005
5   Free Software Foundation, Inc.
6     Written by James Clark (jjc@jclark.com)
7
8This file is part of groff.
9
10groff is free software; you can redistribute it and/or modify it under
11the terms of the GNU General Public License as published by the Free
12Software Foundation; either version 2, or (at your option) any later
13version.
14
15groff is distributed in the hope that it will be useful, but WITHOUT ANY
16WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18for more details.
19
20You should have received a copy of the GNU General Public License along
21with groff; see the file COPYING.  If not, write to the Free Software
22Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23
24#include "table.h"
25
26#define MAX_POINT_SIZE 99
27#define MAX_VERTICAL_SPACING 72
28
29extern "C" const char *Version_string;
30
31int compatible_flag = 0;
32
33class table_input {
34  FILE *fp;
35  enum { START, MIDDLE,
36	 REREAD_T, REREAD_TE, REREAD_E,
37	 LEADER_1, LEADER_2, LEADER_3, LEADER_4,
38	 END, ERROR } state;
39  string unget_stack;
40public:
41  table_input(FILE *);
42  int get();
43  int ended() { return unget_stack.empty() && state == END; }
44  void unget(char);
45};
46
47table_input::table_input(FILE *p)
48: fp(p), state(START)
49{
50}
51
52void table_input::unget(char c)
53{
54  assert(c != '\0');
55  unget_stack += c;
56  if (c == '\n')
57    current_lineno--;
58}
59
60int table_input::get()
61{
62  int len = unget_stack.length();
63  if (len != 0) {
64    unsigned char c = unget_stack[len - 1];
65    unget_stack.set_length(len - 1);
66    if (c == '\n')
67      current_lineno++;
68    return c;
69  }
70  int c;
71  for (;;) {
72    switch (state) {
73    case START:
74      if ((c = getc(fp)) == '.') {
75	if ((c = getc(fp)) == 'T') {
76	  if ((c = getc(fp)) == 'E') {
77	    if (compatible_flag) {
78	      state = END;
79	      return EOF;
80	    }
81	    else {
82	      c = getc(fp);
83	      if (c != EOF)
84		ungetc(c, fp);
85	      if (c == EOF || c == ' ' || c == '\n') {
86		state = END;
87		return EOF;
88	      }
89	      state = REREAD_TE;
90	      return '.';
91	    }
92	  }
93	  else {
94	    if (c != EOF)
95	      ungetc(c, fp);
96	    state = REREAD_T;
97	    return '.';
98	  }
99	}
100	else {
101	  if (c != EOF)
102	    ungetc(c, fp);
103	  state = MIDDLE;
104	  return '.';
105	}
106      }
107      else if (c == EOF) {
108	state = ERROR;
109	return EOF;
110      }
111      else {
112	if (c == '\n')
113	  current_lineno++;
114	else {
115	  state = MIDDLE;
116	  if (c == '\0') {
117	    error("invalid input character code 0");
118	    break;
119	  }
120	}
121	return c;
122      }
123      break;
124    case MIDDLE:
125      // handle line continuation and uninterpreted leader character
126      if ((c = getc(fp)) == '\\') {
127	c = getc(fp);
128	if (c == '\n')
129	  c = getc(fp);		// perhaps state ought to be START now
130	else if (c == 'a' && compatible_flag) {
131	  state = LEADER_1;
132	  return '\\';
133	}
134	else {
135	  if (c != EOF)
136	    ungetc(c, fp);
137	  c = '\\';
138	}
139      }
140      if (c == EOF) {
141	state = ERROR;
142	return EOF;
143      }
144      else {
145	if (c == '\n') {
146	  state = START;
147	  current_lineno++;
148	}
149	else if (c == '\0') {
150	  error("invalid input character code 0");
151	  break;
152	}
153	return c;
154      }
155    case REREAD_T:
156      state = MIDDLE;
157      return 'T';
158    case REREAD_TE:
159      state = REREAD_E;
160      return 'T';
161    case REREAD_E:
162      state = MIDDLE;
163      return 'E';
164    case LEADER_1:
165      state = LEADER_2;
166      return '*';
167    case LEADER_2:
168      state = LEADER_3;
169      return '(';
170    case LEADER_3:
171      state = LEADER_4;
172      return PREFIX_CHAR;
173    case LEADER_4:
174      state = MIDDLE;
175      return LEADER_CHAR;
176    case END:
177    case ERROR:
178      return EOF;
179    }
180  }
181}
182
183void process_input_file(FILE *);
184void process_table(table_input &in);
185
186void process_input_file(FILE *fp)
187{
188  enum { START, MIDDLE, HAD_DOT, HAD_T, HAD_TS, HAD_l, HAD_lf } state;
189  state = START;
190  int c;
191  while ((c = getc(fp)) != EOF)
192    switch (state) {
193    case START:
194      if (c == '.')
195	state = HAD_DOT;
196      else {
197	if (c == '\n')
198	  current_lineno++;
199	else
200	  state = MIDDLE;
201	putchar(c);
202      }
203      break;
204    case MIDDLE:
205      if (c == '\n') {
206	current_lineno++;
207	state = START;
208      }
209      putchar(c);
210      break;
211    case HAD_DOT:
212      if (c == 'T')
213	state = HAD_T;
214      else if (c == 'l')
215	state = HAD_l;
216      else {
217	putchar('.');
218	putchar(c);
219	if (c == '\n') {
220	  current_lineno++;
221	  state = START;
222	}
223	else
224	  state = MIDDLE;
225      }
226      break;
227    case HAD_T:
228      if (c == 'S')
229	state = HAD_TS;
230      else {
231	putchar('.');
232	putchar('T');
233	putchar(c);
234	if (c == '\n') {
235 	  current_lineno++;
236	  state = START;
237	}
238	else
239	  state = MIDDLE;
240      }
241      break;
242    case HAD_TS:
243      if (c == ' ' || c == '\n' || compatible_flag) {
244	putchar('.');
245	putchar('T');
246	putchar('S');
247	while (c != '\n') {
248	  if (c == EOF) {
249	    error("end of file at beginning of table");
250	    return;
251	  }
252	  putchar(c);
253	  c = getc(fp);
254	}
255	putchar('\n');
256	current_lineno++;
257	{
258	  table_input input(fp);
259	  process_table(input);
260	  set_troff_location(current_filename, current_lineno);
261	  if (input.ended()) {
262	    fputs(".TE", stdout);
263	    while ((c = getc(fp)) != '\n') {
264	      if (c == EOF) {
265		putchar('\n');
266		return;
267	      }
268	      putchar(c);
269	    }
270	    putchar('\n');
271	    current_lineno++;
272	  }
273	}
274	state = START;
275      }
276      else {
277	fputs(".TS", stdout);
278	putchar(c);
279	state = MIDDLE;
280      }
281      break;
282    case HAD_l:
283      if (c == 'f')
284	state = HAD_lf;
285      else {
286	putchar('.');
287	putchar('l');
288	putchar(c);
289	if (c == '\n') {
290 	  current_lineno++;
291	  state = START;
292	}
293	else
294	  state = MIDDLE;
295      }
296      break;
297    case HAD_lf:
298      if (c == ' ' || c == '\n' || compatible_flag) {
299	string line;
300	while (c != EOF) {
301	  line += c;
302	  if (c == '\n') {
303	    current_lineno++;
304	    break;
305	  }
306	  c = getc(fp);
307	}
308	line += '\0';
309	interpret_lf_args(line.contents());
310	printf(".lf%s", line.contents());
311	state = START;
312      }
313      else {
314	fputs(".lf", stdout);
315	putchar(c);
316	state = MIDDLE;
317      }
318      break;
319    default:
320      assert(0);
321    }
322  switch(state) {
323  case START:
324    break;
325  case MIDDLE:
326    putchar('\n');
327    break;
328  case HAD_DOT:
329    fputs(".\n", stdout);
330    break;
331  case HAD_l:
332    fputs(".l\n", stdout);
333    break;
334  case HAD_T:
335    fputs(".T\n", stdout);
336    break;
337  case HAD_lf:
338    fputs(".lf\n", stdout);
339    break;
340  case HAD_TS:
341    fputs(".TS\n", stdout);
342    break;
343  }
344  if (fp != stdin)
345    fclose(fp);
346}
347
348struct options {
349  unsigned flags;
350  int linesize;
351  char delim[2];
352  char tab_char;
353  char decimal_point_char;
354
355  options();
356};
357
358options::options()
359: flags(0), linesize(0), tab_char('\t'), decimal_point_char('.')
360{
361  delim[0] = delim[1] = '\0';
362}
363
364// Return non-zero if p and q are the same ignoring case.
365
366int strieq(const char *p, const char *q)
367{
368  for (; cmlower(*p) == cmlower(*q); p++, q++)
369    if (*p == '\0')
370      return 1;
371  return 0;
372}
373
374// return 0 if we should give up in this table
375
376options *process_options(table_input &in)
377{
378  options *opt = new options;
379  string line;
380  int level = 0;
381  for (;;) {
382    int c = in.get();
383    if (c == EOF) {
384      int i = line.length();
385      while (--i >= 0)
386	in.unget(line[i]);
387      return opt;
388    }
389    if (c == '\n') {
390      in.unget(c);
391      int i = line.length();
392      while (--i >= 0)
393	in.unget(line[i]);
394      return opt;
395    }
396    else if (c == '(')
397      level++;
398    else if (c == ')')
399      level--;
400    else if (c == ';' && level == 0) {
401      line += '\0';
402      break;
403    }
404    line += c;
405  }
406  if (line.empty())
407    return opt;
408  char *p = &line[0];
409  for (;;) {
410    while (!csalpha(*p) && *p != '\0')
411      p++;
412    if (*p == '\0')
413      break;
414    char *q = p;
415    while (csalpha(*q))
416      q++;
417    char *arg = 0;
418    if (*q != '(' && *q != '\0')
419      *q++ = '\0';
420    while (csspace(*q))
421      q++;
422    if (*q == '(') {
423      *q++ = '\0';
424      arg = q;
425      while (*q != ')' && *q != '\0')
426	q++;
427      if (*q == '\0')
428	error("missing `)'");
429      else
430	*q++ = '\0';
431    }
432    if (*p == '\0') {
433      if (arg)
434	error("argument without option");
435    }
436    else if (strieq(p, "tab")) {
437      if (!arg)
438	error("`tab' option requires argument in parentheses");
439      else {
440	if (arg[0] == '\0' || arg[1] != '\0')
441	  error("argument to `tab' option must be a single character");
442	else
443	  opt->tab_char = arg[0];
444      }
445    }
446    else if (strieq(p, "linesize")) {
447      if (!arg)
448	error("`linesize' option requires argument in parentheses");
449      else {
450	if (sscanf(arg, "%d", &opt->linesize) != 1)
451	  error("bad linesize `%s'", arg);
452	else if (opt->linesize <= 0) {
453	  error("linesize must be positive");
454	  opt->linesize = 0;
455	}
456      }
457    }
458    else if (strieq(p, "delim")) {
459      if (!arg)
460	error("`delim' option requires argument in parentheses");
461      else if (arg[0] == '\0' || arg[1] == '\0' || arg[2] != '\0')
462	error("argument to `delim' option must be two characters");
463      else {
464	opt->delim[0] = arg[0];
465	opt->delim[1] = arg[1];
466      }
467    }
468    else if (strieq(p, "center") || strieq(p, "centre")) {
469      if (arg)
470	error("`center' option does not take an argument");
471      opt->flags |= table::CENTER;
472    }
473    else if (strieq(p, "expand")) {
474      if (arg)
475	error("`expand' option does not take an argument");
476      opt->flags |= table::EXPAND;
477    }
478    else if (strieq(p, "box") || strieq(p, "frame")) {
479      if (arg)
480	error("`box' option does not take an argument");
481      opt->flags |= table::BOX;
482    }
483    else if (strieq(p, "doublebox") || strieq(p, "doubleframe")) {
484      if (arg)
485	error("`doublebox' option does not take an argument");
486      opt->flags |= table::DOUBLEBOX;
487    }
488    else if (strieq(p, "allbox")) {
489      if (arg)
490	error("`allbox' option does not take an argument");
491      opt->flags |= table::ALLBOX;
492    }
493    else if (strieq(p, "nokeep")) {
494      if (arg)
495	error("`nokeep' option does not take an argument");
496      opt->flags |= table::NOKEEP;
497    }
498    else if (strieq(p, "nospaces")) {
499      if (arg)
500	error("`nospaces' option does not take an argument");
501      opt->flags |= table::NOSPACES;
502    }
503    else if (strieq(p, "decimalpoint")) {
504      if (!arg)
505	error("`decimalpoint' option requires argument in parentheses");
506      else {
507	if (arg[0] == '\0' || arg[1] != '\0')
508	  error("argument to `decimalpoint' option must be a single character");
509	else
510	  opt->decimal_point_char = arg[0];
511      }
512    }
513    else {
514      error("unrecognised global option `%1'", p);
515      // delete opt;
516      // return 0;
517    }
518    p = q;
519  }
520  return opt;
521}
522
523entry_modifier::entry_modifier()
524: vertical_alignment(CENTER), zero_width(0), stagger(0)
525{
526  vertical_spacing.inc = vertical_spacing.val = 0;
527  point_size.inc = point_size.val = 0;
528}
529
530entry_modifier::~entry_modifier()
531{
532}
533
534entry_format::entry_format() : type(FORMAT_LEFT)
535{
536}
537
538entry_format::entry_format(format_type t) : type(t)
539{
540}
541
542void entry_format::debug_print() const
543{
544  switch (type) {
545  case FORMAT_LEFT:
546    putc('l', stderr);
547    break;
548  case FORMAT_CENTER:
549    putc('c', stderr);
550    break;
551  case FORMAT_RIGHT:
552    putc('r', stderr);
553    break;
554  case FORMAT_NUMERIC:
555    putc('n', stderr);
556    break;
557  case FORMAT_ALPHABETIC:
558    putc('a', stderr);
559    break;
560  case FORMAT_SPAN:
561    putc('s', stderr);
562    break;
563  case FORMAT_VSPAN:
564    putc('^', stderr);
565    break;
566  case FORMAT_HLINE:
567    putc('_', stderr);
568    break;
569  case FORMAT_DOUBLE_HLINE:
570    putc('=', stderr);
571    break;
572  default:
573    assert(0);
574    break;
575  }
576  if (point_size.val != 0) {
577    putc('p', stderr);
578    if (point_size.inc > 0)
579      putc('+', stderr);
580    else if (point_size.inc < 0)
581      putc('-', stderr);
582    fprintf(stderr, "%d ", point_size.val);
583  }
584  if (vertical_spacing.val != 0) {
585    putc('v', stderr);
586    if (vertical_spacing.inc > 0)
587      putc('+', stderr);
588    else if (vertical_spacing.inc < 0)
589      putc('-', stderr);
590    fprintf(stderr, "%d ", vertical_spacing.val);
591  }
592  if (!font.empty()) {
593    putc('f', stderr);
594    put_string(font, stderr);
595    putc(' ', stderr);
596  }
597  if (!macro.empty()) {
598    putc('m', stderr);
599    put_string(macro, stderr);
600    putc(' ', stderr);
601  }
602  switch (vertical_alignment) {
603  case entry_modifier::CENTER:
604    break;
605  case entry_modifier::TOP:
606    putc('t', stderr);
607    break;
608  case entry_modifier::BOTTOM:
609    putc('d', stderr);
610    break;
611  }
612  if (zero_width)
613    putc('z', stderr);
614  if (stagger)
615    putc('u', stderr);
616}
617
618struct format {
619  int nrows;
620  int ncolumns;
621  int *separation;
622  string *width;
623  char *equal;
624  entry_format **entry;
625  char **vline;
626
627  format(int nr, int nc);
628  ~format();
629  void add_rows(int n);
630};
631
632format::format(int nr, int nc) : nrows(nr), ncolumns(nc)
633{
634  int i;
635  separation = ncolumns > 1 ? new int[ncolumns - 1] : 0;
636  for (i = 0; i < ncolumns-1; i++)
637    separation[i] = -1;
638  width = new string[ncolumns];
639  equal = new char[ncolumns];
640  for (i = 0; i < ncolumns; i++)
641    equal[i] = 0;
642  entry = new entry_format *[nrows];
643  for (i = 0; i < nrows; i++)
644    entry[i] = new entry_format[ncolumns];
645  vline = new char*[nrows];
646  for (i = 0; i < nrows; i++) {
647    vline[i] = new char[ncolumns+1];
648    for (int j = 0; j < ncolumns+1; j++)
649      vline[i][j] = 0;
650  }
651}
652
653void format::add_rows(int n)
654{
655  int i;
656  char **old_vline = vline;
657  vline = new char*[nrows + n];
658  for (i = 0; i < nrows; i++)
659    vline[i] = old_vline[i];
660  a_delete old_vline;
661  for (i = 0; i < n; i++) {
662    vline[nrows + i] = new char[ncolumns + 1];
663    for (int j = 0; j < ncolumns + 1; j++)
664      vline[nrows + i][j] = 0;
665  }
666  entry_format **old_entry = entry;
667  entry = new entry_format *[nrows + n];
668  for (i = 0; i < nrows; i++)
669    entry[i] = old_entry[i];
670  a_delete old_entry;
671  for (i = 0; i < n; i++)
672    entry[nrows + i] = new entry_format[ncolumns];
673  nrows += n;
674}
675
676format::~format()
677{
678  a_delete separation;
679  ad_delete(ncolumns) width;
680  a_delete equal;
681  for (int i = 0; i < nrows; i++) {
682    a_delete vline[i];
683    ad_delete(ncolumns) entry[i];
684  }
685  a_delete vline;
686  a_delete entry;
687}
688
689struct input_entry_format : public entry_format {
690  input_entry_format *next;
691  string width;
692  int separation;
693  int vline;
694  int pre_vline;
695  int last_column;
696  int equal;
697  input_entry_format(format_type, input_entry_format * = 0);
698  ~input_entry_format();
699  void debug_print();
700};
701
702input_entry_format::input_entry_format(format_type t, input_entry_format *p)
703: entry_format(t), next(p)
704{
705  separation = -1;
706  last_column = 0;
707  vline = 0;
708  pre_vline = 0;
709  equal = 0;
710}
711
712input_entry_format::~input_entry_format()
713{
714}
715
716void free_input_entry_format_list(input_entry_format *list)
717{
718  while (list) {
719    input_entry_format *tem = list;
720    list = list->next;
721    delete tem;
722  }
723}
724
725void input_entry_format::debug_print()
726{
727  int i;
728  for (i = 0; i < pre_vline; i++)
729    putc('|', stderr);
730  entry_format::debug_print();
731  if (!width.empty()) {
732    putc('w', stderr);
733    putc('(', stderr);
734    put_string(width, stderr);
735    putc(')', stderr);
736  }
737  if (equal)
738    putc('e', stderr);
739  if (separation >= 0)
740    fprintf(stderr, "%d", separation);
741  for (i = 0; i < vline; i++)
742    putc('|', stderr);
743  if (last_column)
744    putc(',', stderr);
745}
746
747// Return zero if we should give up on this table.
748// If this is a continuation format line, current_format will be the current
749// format line.
750
751format *process_format(table_input &in, options *opt,
752		       format *current_format = 0)
753{
754  input_entry_format *list = 0;
755  int c = in.get();
756  for (;;) {
757    int pre_vline = 0;
758    int got_format = 0;
759    int got_period = 0;
760    format_type t = FORMAT_LEFT;
761    for (;;) {
762      if (c == EOF) {
763	error("end of input while processing format");
764	free_input_entry_format_list(list);
765	return 0;
766      }
767      switch (c) {
768      case 'n':
769      case 'N':
770	t = FORMAT_NUMERIC;
771	got_format = 1;
772	break;
773      case 'a':
774      case 'A':
775	got_format = 1;
776	t = FORMAT_ALPHABETIC;
777	break;
778      case 'c':
779      case 'C':
780	got_format = 1;
781	t = FORMAT_CENTER;
782	break;
783      case 'l':
784      case 'L':
785	got_format = 1;
786	t = FORMAT_LEFT;
787	break;
788      case 'r':
789      case 'R':
790	got_format = 1;
791	t = FORMAT_RIGHT;
792	break;
793      case 's':
794      case 'S':
795	got_format = 1;
796	t = FORMAT_SPAN;
797	break;
798      case '^':
799	got_format = 1;
800	t = FORMAT_VSPAN;
801	break;
802      case '_':
803      case '-':			// tbl also accepts this
804	got_format = 1;
805	t = FORMAT_HLINE;
806	break;
807      case '=':
808	got_format = 1;
809	t = FORMAT_DOUBLE_HLINE;
810	break;
811      case '.':
812	got_period = 1;
813	break;
814      case '|':
815	pre_vline++;
816	break;
817      case ' ':
818      case '\t':
819      case '\n':
820	break;
821      default:
822	if (c == opt->tab_char)
823	  break;
824	error("unrecognised format `%1'", char(c));
825	free_input_entry_format_list(list);
826	return 0;
827      }
828      if (got_period)
829	break;
830      c = in.get();
831      if (got_format)
832	break;
833    }
834    if (got_period)
835      break;
836    list = new input_entry_format(t, list);
837    if (pre_vline)
838      list->pre_vline = pre_vline;
839    int success = 1;
840    do {
841      switch (c) {
842      case 't':
843      case 'T':
844	c = in.get();
845	list->vertical_alignment = entry_modifier::TOP;
846	break;
847      case 'd':
848      case 'D':
849	c = in.get();
850	list->vertical_alignment = entry_modifier::BOTTOM;
851	break;
852      case 'u':
853      case 'U':
854	c = in.get();
855	list->stagger = 1;
856	break;
857      case 'z':
858      case 'Z':
859	c = in.get();
860	list->zero_width = 1;
861	break;
862      case '0':
863      case '1':
864      case '2':
865      case '3':
866      case '4':
867      case '5':
868      case '6':
869      case '7':
870      case '8':
871      case '9':
872	{
873	  int w = 0;
874	  do {
875	    w = w*10 + (c - '0');
876	    c = in.get();
877	  } while (c != EOF && csdigit(c));
878	  list->separation = w;
879	}
880	break;
881      case 'f':
882      case 'F':
883	do {
884	  c = in.get();
885	} while (c == ' ' || c == '\t');
886	if (c == EOF) {
887	  error("missing font name");
888	  break;
889	}
890	if (c == '(') {
891	  for (;;) {
892	    c = in.get();
893	    if (c == EOF || c == ' ' || c == '\t') {
894	      error("missing `)'");
895	      break;
896	    }
897	    if (c == ')') {
898	      c = in.get();
899	      break;
900	    }
901	    list->font += char(c);
902	  }
903	}
904	else {
905	  list->font = c;
906	  char cc = c;
907	  c = in.get();
908	  if (!csdigit(cc)
909	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
910	    list->font += char(c);
911	    c = in.get();
912	  }
913	}
914	break;
915      case 'x':
916      case 'X':
917	do {
918	  c = in.get();
919	} while (c == ' ' || c == '\t');
920	if (c == EOF) {
921	  error("missing macro name");
922	  break;
923	}
924	if (c == '(') {
925	  for (;;) {
926	    c = in.get();
927	    if (c == EOF || c == ' ' || c == '\t') {
928	      error("missing `)'");
929	      break;
930	    }
931	    if (c == ')') {
932	      c = in.get();
933	      break;
934	    }
935	    list->macro += char(c);
936	  }
937	}
938	else {
939	  list->macro = c;
940	  char cc = c;
941	  c = in.get();
942	  if (!csdigit(cc)
943	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
944	    list->macro += char(c);
945	    c = in.get();
946	  }
947	}
948	break;
949      case 'v':
950      case 'V':
951	c = in.get();
952	list->vertical_spacing.val = 0;
953	list->vertical_spacing.inc = 0;
954	if (c == '+' || c == '-') {
955	  list->vertical_spacing.inc = (c == '+' ? 1 : -1);
956	  c = in.get();
957	}
958	if (c == EOF || !csdigit(c)) {
959	  error("`v' modifier must be followed by number");
960	  list->vertical_spacing.inc = 0;
961	}
962	else {
963	  do {
964	    list->vertical_spacing.val *= 10;
965	    list->vertical_spacing.val += c - '0';
966	    c = in.get();
967	  } while (c != EOF && csdigit(c));
968	}
969	if (list->vertical_spacing.val > MAX_VERTICAL_SPACING
970	    || list->vertical_spacing.val < -MAX_VERTICAL_SPACING) {
971	  error("unreasonable vertical spacing");
972	  list->vertical_spacing.val = 0;
973	  list->vertical_spacing.inc = 0;
974	}
975	break;
976      case 'p':
977      case 'P':
978	c = in.get();
979	list->point_size.val = 0;
980	list->point_size.inc = 0;
981	if (c == '+' || c == '-') {
982	  list->point_size.inc = (c == '+' ? 1 : -1);
983	  c = in.get();
984	}
985	if (c == EOF || !csdigit(c)) {
986	  error("`p' modifier must be followed by number");
987	  list->point_size.inc = 0;
988	}
989	else {
990	  do {
991	    list->point_size.val *= 10;
992	    list->point_size.val += c - '0';
993	    c = in.get();
994	  } while (c != EOF && csdigit(c));
995	}
996	if (list->point_size.val > MAX_POINT_SIZE
997	    || list->point_size.val < -MAX_POINT_SIZE) {
998	  error("unreasonable point size");
999	  list->point_size.val = 0;
1000	  list->point_size.inc = 0;
1001	}
1002	break;
1003      case 'w':
1004      case 'W':
1005	c = in.get();
1006	while (c == ' ' || c == '\t')
1007	  c = in.get();
1008	if (c == '(') {
1009	  list->width = "";
1010	  c = in.get();
1011	  while (c != ')') {
1012	    if (c == EOF || c == '\n') {
1013	      error("missing `)'");
1014	      free_input_entry_format_list(list);
1015	      return 0;
1016	    }
1017	    list->width += c;
1018	    c = in.get();
1019	  }
1020	  c = in.get();
1021	}
1022	else {
1023	  if (c == '+' || c == '-') {
1024	    list->width = char(c);
1025	    c = in.get();
1026	  }
1027	  else
1028	    list->width = "";
1029	  if (c == EOF || !csdigit(c))
1030	    error("bad argument for `w' modifier");
1031	  else {
1032	    do {
1033	      list->width += char(c);
1034	      c = in.get();
1035	    } while (c != EOF && csdigit(c));
1036	  }
1037	}
1038	break;
1039      case 'e':
1040      case 'E':
1041	c = in.get();
1042	list->equal++;
1043	break;
1044      case '|':
1045	c = in.get();
1046	list->vline++;
1047	break;
1048      case 'B':
1049      case 'b':
1050	c = in.get();
1051	list->font = "B";
1052	break;
1053      case 'I':
1054      case 'i':
1055	c = in.get();
1056	list->font = "I";
1057	break;
1058      case ' ':
1059      case '\t':
1060	c = in.get();
1061	break;
1062      default:
1063	if (c == opt->tab_char)
1064	  c = in.get();
1065	else
1066	  success = 0;
1067	break;
1068      }
1069    } while (success);
1070    if (list->vline > 2) {
1071      list->vline = 2;
1072      error("more than 2 vertical bars between key letters");
1073    }
1074    if (c == '\n' || c == ',') {
1075      c = in.get();
1076      list->last_column = 1;
1077    }
1078  }
1079  if (c == '.') {
1080    do {
1081      c = in.get();
1082    } while (c == ' ' || c == '\t');
1083    if (c != '\n') {
1084      error("`.' not last character on line");
1085      free_input_entry_format_list(list);
1086      return 0;
1087    }
1088  }
1089  if (!list) {
1090    error("no format");
1091    free_input_entry_format_list(list);
1092    return 0;
1093  }
1094  list->last_column = 1;
1095  // now reverse the list so that the first row is at the beginning
1096  input_entry_format *rev = 0;
1097  while (list != 0) {
1098    input_entry_format *tem = list->next;
1099    list->next = rev;
1100    rev = list;
1101    list = tem;
1102  }
1103  list = rev;
1104  input_entry_format *tem;
1105
1106#if 0
1107  for (tem = list; tem; tem = tem->next)
1108    tem->debug_print();
1109  putc('\n', stderr);
1110#endif
1111  // compute number of columns and rows
1112  int ncolumns = 0;
1113  int nrows = 0;
1114  int col = 0;
1115  for (tem = list; tem; tem = tem->next) {
1116    if (tem->last_column) {
1117      if (col >= ncolumns)
1118	ncolumns = col + 1;
1119      col = 0;
1120      nrows++;
1121    }
1122    else
1123      col++;
1124  }
1125  int row;
1126  format *f;
1127  if (current_format) {
1128    if (ncolumns > current_format->ncolumns) {
1129      error("cannot increase the number of columns in a continued format");
1130      free_input_entry_format_list(list);
1131      return 0;
1132    }
1133    f = current_format;
1134    row = f->nrows;
1135    f->add_rows(nrows);
1136  }
1137  else {
1138    f = new format(nrows, ncolumns);
1139    row = 0;
1140  }
1141  col = 0;
1142  for (tem = list; tem; tem = tem->next) {
1143    f->entry[row][col] = *tem;
1144    if (col < ncolumns-1) {
1145      // use the greatest separation
1146      if (tem->separation > f->separation[col]) {
1147	if (current_format)
1148	  error("cannot change column separation in continued format");
1149	else
1150	  f->separation[col] = tem->separation;
1151      }
1152    }
1153    else if (tem->separation >= 0)
1154      error("column separation specified for last column");
1155    if (tem->equal && !f->equal[col]) {
1156      if (current_format)
1157	error("cannot change which columns are equal in continued format");
1158      else
1159	f->equal[col] = 1;
1160    }
1161    if (!tem->width.empty()) {
1162      // use the last width
1163      if (!f->width[col].empty() && f->width[col] != tem->width)
1164	error("multiple widths for column %1", col+1);
1165      f->width[col] = tem->width;
1166    }
1167    if (tem->pre_vline) {
1168      assert(col == 0);
1169      f->vline[row][col] = tem->pre_vline;
1170    }
1171    f->vline[row][col+1] = tem->vline;
1172    if (tem->last_column) {
1173      row++;
1174      col = 0;
1175    }
1176    else
1177      col++;
1178  }
1179  free_input_entry_format_list(list);
1180  for (col = 0; col < ncolumns; col++) {
1181    entry_format *e = f->entry[f->nrows-1] + col;
1182    if (e->type != FORMAT_HLINE
1183	&& e->type != FORMAT_DOUBLE_HLINE
1184	&& e->type != FORMAT_SPAN)
1185      break;
1186  }
1187  if (col >= ncolumns) {
1188    error("last row of format is all lines");
1189    delete f;
1190    return 0;
1191  }
1192  return f;
1193}
1194
1195table *process_data(table_input &in, format *f, options *opt)
1196{
1197  char tab_char = opt->tab_char;
1198  int ncolumns = f->ncolumns;
1199  int current_row = 0;
1200  int format_index = 0;
1201  int give_up = 0;
1202  enum { DATA_INPUT_LINE, TROFF_INPUT_LINE, SINGLE_HLINE, DOUBLE_HLINE } type;
1203  table *tbl = new table(ncolumns, opt->flags, opt->linesize,
1204			 opt->decimal_point_char);
1205  if (opt->delim[0] != '\0')
1206    tbl->set_delim(opt->delim[0], opt->delim[1]);
1207  for (;;) {
1208    // first determine what type of line this is
1209    int c = in.get();
1210    if (c == EOF)
1211      break;
1212    if (c == '.') {
1213      int d = in.get();
1214      if (d != EOF && csdigit(d)) {
1215	in.unget(d);
1216	type = DATA_INPUT_LINE;
1217      }
1218      else {
1219	in.unget(d);
1220	type = TROFF_INPUT_LINE;
1221      }
1222    }
1223    else if (c == '_' || c == '=') {
1224      int d = in.get();
1225      if (d == '\n') {
1226	if (c == '_')
1227	  type = SINGLE_HLINE;
1228	else
1229	  type = DOUBLE_HLINE;
1230      }
1231      else {
1232	in.unget(d);
1233	type = DATA_INPUT_LINE;
1234      }
1235    }
1236    else {
1237      type = DATA_INPUT_LINE;
1238    }
1239    switch (type) {
1240    case DATA_INPUT_LINE:
1241      {
1242	string input_entry;
1243	if (format_index >= f->nrows)
1244	  format_index = f->nrows - 1;
1245	// A format row that is all lines doesn't use up a data line.
1246	while (format_index < f->nrows - 1) {
1247	  int cnt;
1248	  for (cnt = 0; cnt < ncolumns; cnt++) {
1249	    entry_format *e = f->entry[format_index] + cnt;
1250	    if (e->type != FORMAT_HLINE
1251		&& e->type != FORMAT_DOUBLE_HLINE
1252		// Unfortunately tbl treats a span as needing data.
1253		// && e->type != FORMAT_SPAN
1254		)
1255	      break;
1256	  }
1257	  if (cnt < ncolumns)
1258	    break;
1259	  for (cnt = 0; cnt < ncolumns; cnt++)
1260	    tbl->add_entry(current_row, cnt, input_entry,
1261			   f->entry[format_index] + cnt, current_filename,
1262			   current_lineno);
1263	  tbl->add_vlines(current_row, f->vline[format_index]);
1264	  format_index++;
1265	  current_row++;
1266	}
1267	entry_format *line_format = f->entry[format_index];
1268	int col = 0;
1269	int row_comment = 0;
1270	for (;;) {
1271	  if (c == tab_char || c == '\n') {
1272	    int ln = current_lineno;
1273	    if (c == '\n')
1274	      --ln;
1275	    if ((opt->flags & table::NOSPACES))
1276	      input_entry.remove_spaces();
1277	    while (col < ncolumns
1278		   && line_format[col].type == FORMAT_SPAN) {
1279	      tbl->add_entry(current_row, col, "", &line_format[col],
1280			     current_filename, ln);
1281	      col++;
1282	    }
1283	    if (c == '\n' && input_entry.length() == 2
1284		&& input_entry[0] == 'T' && input_entry[1] == '{') {
1285	      input_entry = "";
1286	      ln++;
1287	      enum {
1288		START, MIDDLE, GOT_T, GOT_RIGHT_BRACE, GOT_DOT,
1289		GOT_l, GOT_lf, END
1290	      } state = START;
1291	      while (state != END) {
1292		c = in.get();
1293		if (c == EOF)
1294		  break;
1295		switch (state) {
1296		case START:
1297		  if (c == 'T')
1298		    state = GOT_T;
1299		  else if (c == '.')
1300		    state = GOT_DOT;
1301		  else {
1302		    input_entry += c;
1303		    if (c != '\n')
1304		      state = MIDDLE;
1305		  }
1306		  break;
1307		case GOT_T:
1308		  if (c == '}')
1309		    state = GOT_RIGHT_BRACE;
1310		  else {
1311		    input_entry += 'T';
1312		    input_entry += c;
1313		    state = c == '\n' ? START : MIDDLE;
1314		  }
1315		  break;
1316		case GOT_DOT:
1317		  if (c == 'l')
1318		    state = GOT_l;
1319		  else {
1320		    input_entry += '.';
1321		    input_entry += c;
1322		    state = c == '\n' ? START : MIDDLE;
1323		  }
1324		  break;
1325		case GOT_l:
1326		  if (c == 'f')
1327		    state = GOT_lf;
1328		  else {
1329		    input_entry += ".l";
1330		    input_entry += c;
1331		    state = c == '\n' ? START : MIDDLE;
1332		  }
1333		  break;
1334		case GOT_lf:
1335		  if (c == ' ' || c == '\n' || compatible_flag) {
1336		    string args;
1337		    input_entry += ".lf";
1338		    while (c != EOF) {
1339		      args += c;
1340		      if (c == '\n')
1341			break;
1342		      c = in.get();
1343		    }
1344		    args += '\0';
1345		    interpret_lf_args(args.contents());
1346		    // remove the '\0'
1347		    args.set_length(args.length() - 1);
1348		    input_entry += args;
1349		    state = START;
1350		  }
1351		  else {
1352		    input_entry += ".lf";
1353		    input_entry += c;
1354		    state = MIDDLE;
1355		  }
1356		  break;
1357		case GOT_RIGHT_BRACE:
1358		  if ((opt->flags & table::NOSPACES)) {
1359		    while (c == ' ')
1360		      c = in.get();
1361		    if (c == EOF)
1362		      break;
1363		  }
1364		  if (c == '\n' || c == tab_char)
1365		    state = END;
1366		  else {
1367		    input_entry += 'T';
1368		    input_entry += '}';
1369		    input_entry += c;
1370		    state = MIDDLE;
1371		  }
1372		  break;
1373		case MIDDLE:
1374		  if (c == '\n')
1375		    state = START;
1376		  input_entry += c;
1377		  break;
1378		case END:
1379		default:
1380		  assert(0);
1381		}
1382	      }
1383	      if (c == EOF) {
1384		error("end of data in middle of text block");
1385		give_up = 1;
1386		break;
1387	      }
1388	    }
1389	    if (col >= ncolumns) {
1390	      if (!input_entry.empty()) {
1391		if (input_entry.length() >= 2
1392		    && input_entry[0] == '\\'
1393		    && input_entry[1] == '"')
1394		  row_comment = 1;
1395		else if (!row_comment) {
1396		  if (c == '\n')
1397		    in.unget(c);
1398		  input_entry += '\0';
1399		  error("excess data entry `%1' discarded",
1400			input_entry.contents());
1401		  if (c == '\n')
1402		    (void)in.get();
1403		}
1404	      }
1405	    }
1406	    else
1407	      tbl->add_entry(current_row, col, input_entry,
1408			     &line_format[col], current_filename, ln);
1409	    col++;
1410	    if (c == '\n')
1411	      break;
1412	    input_entry = "";
1413	  }
1414	  else
1415	    input_entry += c;
1416	  c = in.get();
1417	  if (c == EOF)
1418	    break;
1419	}
1420	if (give_up)
1421	  break;
1422	input_entry = "";
1423	for (; col < ncolumns; col++)
1424	  tbl->add_entry(current_row, col, input_entry, &line_format[col],
1425			 current_filename, current_lineno - 1);
1426	tbl->add_vlines(current_row, f->vline[format_index]);
1427	current_row++;
1428	format_index++;
1429      }
1430      break;
1431    case TROFF_INPUT_LINE:
1432      {
1433	string line;
1434	int ln = current_lineno;
1435	for (;;) {
1436	  line += c;
1437	  if (c == '\n')
1438	    break;
1439	  c = in.get();
1440	  if (c == EOF) {
1441	    break;
1442	  }
1443	}
1444	tbl->add_text_line(current_row, line, current_filename, ln);
1445	if (line.length() >= 4
1446	    && line[0] == '.' && line[1] == 'T' && line[2] == '&') {
1447	  format *newf = process_format(in, opt, f);
1448	  if (newf == 0)
1449	    give_up = 1;
1450	  else
1451	    f = newf;
1452	}
1453	if (line.length() >= 3
1454	    && line[0] == '.' && line[1] == 'l' && line[2] == 'f') {
1455	  line += '\0';
1456	  interpret_lf_args(line.contents() + 3);
1457	}
1458      }
1459      break;
1460    case SINGLE_HLINE:
1461      tbl->add_single_hline(current_row);
1462      break;
1463    case DOUBLE_HLINE:
1464      tbl->add_double_hline(current_row);
1465      break;
1466    default:
1467      assert(0);
1468    }
1469    if (give_up)
1470      break;
1471  }
1472  if (!give_up && current_row == 0) {
1473    error("no real data");
1474    give_up = 1;
1475  }
1476  if (give_up) {
1477    delete tbl;
1478    return 0;
1479  }
1480  // Do this here rather than at the beginning in case continued formats
1481  // change it.
1482  int i;
1483  for (i = 0; i < ncolumns - 1; i++)
1484    if (f->separation[i] >= 0)
1485      tbl->set_column_separation(i, f->separation[i]);
1486  for (i = 0; i < ncolumns; i++)
1487    if (!f->width[i].empty())
1488      tbl->set_minimum_width(i, f->width[i]);
1489  for (i = 0; i < ncolumns; i++)
1490    if (f->equal[i])
1491      tbl->set_equal_column(i);
1492  return tbl;
1493}
1494
1495void process_table(table_input &in)
1496{
1497  options *opt = 0;
1498  format *form = 0;
1499  table *tbl = 0;
1500  if ((opt = process_options(in)) != 0
1501      && (form = process_format(in, opt)) != 0
1502      && (tbl = process_data(in, form, opt)) != 0) {
1503    tbl->print();
1504    delete tbl;
1505  }
1506  else {
1507    error("giving up on this table");
1508    while (in.get() != EOF)
1509      ;
1510  }
1511  delete opt;
1512  delete form;
1513  if (!in.ended())
1514    error("premature end of file");
1515}
1516
1517static void usage(FILE *stream)
1518{
1519  fprintf(stream, "usage: %s [ -vC ] [ files... ]\n", program_name);
1520}
1521
1522int main(int argc, char **argv)
1523{
1524  program_name = argv[0];
1525  static char stderr_buf[BUFSIZ];
1526  setbuf(stderr, stderr_buf);
1527  int opt;
1528  static const struct option long_options[] = {
1529    { "help", no_argument, 0, CHAR_MAX + 1 },
1530    { "version", no_argument, 0, 'v' },
1531    { NULL, 0, 0, 0 }
1532  };
1533  while ((opt = getopt_long(argc, argv, "vCT:", long_options, NULL)) != EOF)
1534    switch (opt) {
1535    case 'C':
1536      compatible_flag = 1;
1537      break;
1538    case 'v':
1539      {
1540	printf("GNU tbl (groff) version %s\n", Version_string);
1541	exit(0);
1542	break;
1543      }
1544    case 'T':
1545      // I'm sick of getting bug reports from IRIX users
1546      break;
1547    case CHAR_MAX + 1: // --help
1548      usage(stdout);
1549      exit(0);
1550      break;
1551    case '?':
1552      usage(stderr);
1553      exit(1);
1554      break;
1555    default:
1556      assert(0);
1557    }
1558  printf(".if !\\n(.g .ab GNU tbl requires GNU troff.\n"
1559	 ".if !dTS .ds TS\n"
1560	 ".if !dTE .ds TE\n");
1561  if (argc > optind) {
1562    for (int i = optind; i < argc; i++)
1563      if (argv[i][0] == '-' && argv[i][1] == '\0') {
1564	current_filename = "-";
1565	current_lineno = 1;
1566	printf(".lf 1 -\n");
1567	process_input_file(stdin);
1568      }
1569      else {
1570	errno = 0;
1571	FILE *fp = fopen(argv[i], "r");
1572	if (fp == 0)
1573	  fatal("can't open `%1': %2", argv[i], strerror(errno));
1574	else {
1575	  current_lineno = 1;
1576	  current_filename = argv[i];
1577	  printf(".lf 1 %s\n", current_filename);
1578	  process_input_file(fp);
1579	}
1580      }
1581  }
1582  else {
1583    current_filename = "-";
1584    current_lineno = 1;
1585    printf(".lf 1 -\n");
1586    process_input_file(stdin);
1587  }
1588  if (ferror(stdout) || fflush(stdout) < 0)
1589    fatal("output error");
1590  return 0;
1591}
1592
1593