main.cpp revision 114402
1// -*- C++ -*-
2/* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003
3   Free Software Foundation, Inc.
4     Written by James Clark (jjc@jclark.com)
5
6This file is part of groff.
7
8groff is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 2, or (at your option) any later
11version.
12
13groff is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License along
19with groff; see the file COPYING.  If not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "table.h"
23
24#define MAX_POINT_SIZE 99
25#define MAX_VERTICAL_SPACING 72
26
27extern "C" const char *Version_string;
28
29static int compatible_flag = 0;
30
31class table_input {
32  FILE *fp;
33  enum { START, MIDDLE, REREAD_T, REREAD_TE, REREAD_E, END, ERROR } state;
34  string unget_stack;
35public:
36  table_input(FILE *);
37  int get();
38  int ended() { return unget_stack.empty() && state == END; }
39  void unget(char);
40};
41
42table_input::table_input(FILE *p)
43: fp(p), state(START)
44{
45}
46
47void table_input::unget(char c)
48{
49  assert(c != '\0');
50  unget_stack += c;
51  if (c == '\n')
52    current_lineno--;
53}
54
55int table_input::get()
56{
57  int len = unget_stack.length();
58  if (len != 0) {
59    unsigned char c = unget_stack[len - 1];
60    unget_stack.set_length(len - 1);
61    if (c == '\n')
62      current_lineno++;
63    return c;
64  }
65  int c;
66  for (;;) {
67    switch (state) {
68    case START:
69      if ((c = getc(fp)) == '.') {
70	if ((c = getc(fp)) == 'T') {
71	  if ((c = getc(fp)) == 'E') {
72	    if (compatible_flag) {
73	      state = END;
74	      return EOF;
75	    }
76	    else {
77	      c = getc(fp);
78	      if (c != EOF)
79		ungetc(c, fp);
80	      if (c == EOF || c == ' ' || c == '\n') {
81		state = END;
82		return EOF;
83	      }
84	      state = REREAD_TE;
85	      return '.';
86	    }
87	  }
88	  else {
89	    if (c != EOF)
90	      ungetc(c, fp);
91	    state = REREAD_T;
92	    return '.';
93	  }
94	}
95	else {
96	  if (c != EOF)
97	    ungetc(c, fp);
98	  state = MIDDLE;
99	  return '.';
100	}
101      }
102      else if (c == EOF) {
103	state = ERROR;
104	return EOF;
105      }
106      else {
107	if (c == '\n')
108	  current_lineno++;
109	else {
110	  state = MIDDLE;
111	  if (c == '\0') {
112	    error("invalid input character code 0");
113	    break;
114	  }
115	}
116	return c;
117      }
118      break;
119    case MIDDLE:
120      // handle line continuation
121      if ((c = getc(fp)) == '\\') {
122	c = getc(fp);
123	if (c == '\n')
124	  c = getc(fp);		// perhaps state ought to be START now
125	else {
126	  if (c != EOF)
127	    ungetc(c, fp);
128	  c = '\\';
129	}
130      }
131      if (c == EOF) {
132	state = ERROR;
133	return EOF;
134      }
135      else {
136	if (c == '\n') {
137	  state = START;
138	  current_lineno++;
139	}
140	else if (c == '\0') {
141	  error("invalid input character code 0");
142	  break;
143	}
144	return c;
145      }
146    case REREAD_T:
147      state = MIDDLE;
148      return 'T';
149    case REREAD_TE:
150      state = REREAD_E;
151      return 'T';
152    case REREAD_E:
153      state = MIDDLE;
154      return 'E';
155    case END:
156    case ERROR:
157      return EOF;
158    }
159  }
160}
161
162void process_input_file(FILE *);
163void process_table(table_input &in);
164
165void process_input_file(FILE *fp)
166{
167  enum { START, MIDDLE, HAD_DOT, HAD_T, HAD_TS, HAD_l, HAD_lf } state;
168  state = START;
169  int c;
170  while ((c = getc(fp)) != EOF)
171    switch (state) {
172    case START:
173      if (c == '.')
174	state = HAD_DOT;
175      else {
176	if (c == '\n')
177	  current_lineno++;
178	else
179	  state = MIDDLE;
180	putchar(c);
181      }
182      break;
183    case MIDDLE:
184      if (c == '\n') {
185	current_lineno++;
186	state = START;
187      }
188      putchar(c);
189      break;
190    case HAD_DOT:
191      if (c == 'T')
192	state = HAD_T;
193      else if (c == 'l')
194	state = HAD_l;
195      else {
196	putchar('.');
197	putchar(c);
198	if (c == '\n') {
199	  current_lineno++;
200	  state = START;
201	}
202	else
203	  state = MIDDLE;
204      }
205      break;
206    case HAD_T:
207      if (c == 'S')
208	state = HAD_TS;
209      else {
210	putchar('.');
211	putchar('T');
212	putchar(c);
213	if (c == '\n') {
214 	  current_lineno++;
215	  state = START;
216	}
217	else
218	  state = MIDDLE;
219      }
220      break;
221    case HAD_TS:
222      if (c == ' ' || c == '\n' || compatible_flag) {
223	putchar('.');
224	putchar('T');
225	putchar('S');
226	while (c != '\n') {
227	  if (c == EOF) {
228	    error("end of file at beginning of table");
229	    return;
230	  }
231	  putchar(c);
232	  c = getc(fp);
233	}
234	putchar('\n');
235	current_lineno++;
236	{
237	  table_input input(fp);
238	  process_table(input);
239	  set_troff_location(current_filename, current_lineno);
240	  if (input.ended()) {
241	    fputs(".TE", stdout);
242	    while ((c = getc(fp)) != '\n') {
243	      if (c == EOF) {
244		putchar('\n');
245		return;
246	      }
247	      putchar(c);
248	    }
249	    putchar('\n');
250	    current_lineno++;
251	  }
252	}
253	state = START;
254      }
255      else {
256	fputs(".TS", stdout);
257	putchar(c);
258	state = MIDDLE;
259      }
260      break;
261    case HAD_l:
262      if (c == 'f')
263	state = HAD_lf;
264      else {
265	putchar('.');
266	putchar('l');
267	putchar(c);
268	if (c == '\n') {
269 	  current_lineno++;
270	  state = START;
271	}
272	else
273	  state = MIDDLE;
274      }
275      break;
276    case HAD_lf:
277      if (c == ' ' || c == '\n' || compatible_flag) {
278	string line;
279	while (c != EOF) {
280	  line += c;
281	  if (c == '\n') {
282	    current_lineno++;
283	    break;
284	  }
285	  c = getc(fp);
286	}
287	line += '\0';
288	interpret_lf_args(line.contents());
289	printf(".lf%s", line.contents());
290	state = START;
291      }
292      else {
293	fputs(".lf", stdout);
294	putchar(c);
295	state = MIDDLE;
296      }
297      break;
298    default:
299      assert(0);
300    }
301  switch(state) {
302  case START:
303    break;
304  case MIDDLE:
305    putchar('\n');
306    break;
307  case HAD_DOT:
308    fputs(".\n", stdout);
309    break;
310  case HAD_l:
311    fputs(".l\n", stdout);
312    break;
313  case HAD_T:
314    fputs(".T\n", stdout);
315    break;
316  case HAD_lf:
317    fputs(".lf\n", stdout);
318    break;
319  case HAD_TS:
320    fputs(".TS\n", stdout);
321    break;
322  }
323  if (fp != stdin)
324    fclose(fp);
325}
326
327struct options {
328  unsigned flags;
329  int linesize;
330  char delim[2];
331  char tab_char;
332  char decimal_point_char;
333
334  options();
335};
336
337options::options()
338: flags(0), linesize(0), tab_char('\t'), decimal_point_char('.')
339{
340  delim[0] = delim[1] = '\0';
341}
342
343// Return non-zero if p and q are the same ignoring case.
344
345int strieq(const char *p, const char *q)
346{
347  for (; cmlower(*p) == cmlower(*q); p++, q++)
348    if (*p == '\0')
349      return 1;
350  return 0;
351}
352
353// return 0 if we should give up in this table
354
355options *process_options(table_input &in)
356{
357  options *opt = new options;
358  string line;
359  int level = 0;
360  for (;;) {
361    int c = in.get();
362    if (c == EOF) {
363      int i = line.length();
364      while (--i >= 0)
365	in.unget(line[i]);
366      return opt;
367    }
368    if (c == '\n') {
369      in.unget(c);
370      int i = line.length();
371      while (--i >= 0)
372	in.unget(line[i]);
373      return opt;
374    }
375    else if (c == '(')
376      level++;
377    else if (c == ')')
378      level--;
379    else if (c == ';' && level == 0) {
380      line += '\0';
381      break;
382    }
383    line += c;
384  }
385  if (line.empty())
386    return opt;
387  char *p = &line[0];
388  for (;;) {
389    while (!csalpha(*p) && *p != '\0')
390      p++;
391    if (*p == '\0')
392      break;
393    char *q = p;
394    while (csalpha(*q))
395      q++;
396    char *arg = 0;
397    if (*q != '(' && *q != '\0')
398      *q++ = '\0';
399    while (csspace(*q))
400      q++;
401    if (*q == '(') {
402      *q++ = '\0';
403      arg = q;
404      while (*q != ')' && *q != '\0')
405	q++;
406      if (*q == '\0')
407	error("missing `)'");
408      else
409	*q++ = '\0';
410    }
411    if (*p == '\0') {
412      if (arg)
413	error("argument without option");
414    }
415    else if (strieq(p, "tab")) {
416      if (!arg)
417	error("`tab' option requires argument in parentheses");
418      else {
419	if (arg[0] == '\0' || arg[1] != '\0')
420	  error("argument to `tab' option must be a single character");
421	else
422	  opt->tab_char = arg[0];
423      }
424    }
425    else if (strieq(p, "linesize")) {
426      if (!arg)
427	error("`linesize' option requires argument in parentheses");
428      else {
429	if (sscanf(arg, "%d", &opt->linesize) != 1)
430	  error("bad linesize `%s'", arg);
431	else if (opt->linesize <= 0) {
432	  error("linesize must be positive");
433	  opt->linesize = 0;
434	}
435      }
436    }
437    else if (strieq(p, "delim")) {
438      if (!arg)
439	error("`delim' option requires argument in parentheses");
440      else if (arg[0] == '\0' || arg[1] == '\0' || arg[2] != '\0')
441	error("argument to `delim' option must be two characters");
442      else {
443	opt->delim[0] = arg[0];
444	opt->delim[1] = arg[1];
445      }
446    }
447    else if (strieq(p, "center") || strieq(p, "centre")) {
448      if (arg)
449	error("`center' option does not take an argument");
450      opt->flags |= table::CENTER;
451    }
452    else if (strieq(p, "expand")) {
453      if (arg)
454	error("`expand' option does not take an argument");
455      opt->flags |= table::EXPAND;
456    }
457    else if (strieq(p, "box") || strieq(p, "frame")) {
458      if (arg)
459	error("`box' option does not take an argument");
460      opt->flags |= table::BOX;
461    }
462    else if (strieq(p, "doublebox") || strieq(p, "doubleframe")) {
463      if (arg)
464	error("`doublebox' option does not take an argument");
465      opt->flags |= table::DOUBLEBOX;
466    }
467    else if (strieq(p, "allbox")) {
468      if (arg)
469	error("`allbox' option does not take an argument");
470      opt->flags |= table::ALLBOX;
471    }
472    else if (strieq(p, "nokeep")) {
473      if (arg)
474	error("`nokeep' option does not take an argument");
475      opt->flags |= table::NOKEEP;
476    }
477    else if (strieq(p, "nospaces")) {
478      if (arg)
479	error("`nospaces' option does not take an argument");
480      opt->flags |= table::NOSPACES;
481    }
482    else if (strieq(p, "decimalpoint")) {
483      if (!arg)
484	error("`decimalpoint' option requires argument in parentheses");
485      else {
486	if (arg[0] == '\0' || arg[1] != '\0')
487	  error("argument to `decimalpoint' option must be a single character");
488	else
489	  opt->decimal_point_char = arg[0];
490      }
491    }
492    else {
493      error("unrecognised global option `%1'", p);
494      // delete opt;
495      // return 0;
496    }
497    p = q;
498  }
499  return opt;
500}
501
502entry_modifier::entry_modifier()
503: vertical_alignment(CENTER), zero_width(0), stagger(0)
504{
505  vertical_spacing.inc = vertical_spacing.val = 0;
506  point_size.inc = point_size.val = 0;
507}
508
509entry_modifier::~entry_modifier()
510{
511}
512
513entry_format::entry_format() : type(FORMAT_LEFT)
514{
515}
516
517entry_format::entry_format(format_type t) : type(t)
518{
519}
520
521void entry_format::debug_print() const
522{
523  switch (type) {
524  case FORMAT_LEFT:
525    putc('l', stderr);
526    break;
527  case FORMAT_CENTER:
528    putc('c', stderr);
529    break;
530  case FORMAT_RIGHT:
531    putc('r', stderr);
532    break;
533  case FORMAT_NUMERIC:
534    putc('n', stderr);
535    break;
536  case FORMAT_ALPHABETIC:
537    putc('a', stderr);
538    break;
539  case FORMAT_SPAN:
540    putc('s', stderr);
541    break;
542  case FORMAT_VSPAN:
543    putc('^', stderr);
544    break;
545  case FORMAT_HLINE:
546    putc('_', stderr);
547    break;
548  case FORMAT_DOUBLE_HLINE:
549    putc('=', stderr);
550    break;
551  default:
552    assert(0);
553    break;
554  }
555  if (point_size.val != 0) {
556    putc('p', stderr);
557    if (point_size.inc > 0)
558      putc('+', stderr);
559    else if (point_size.inc < 0)
560      putc('-', stderr);
561    fprintf(stderr, "%d ", point_size.val);
562  }
563  if (vertical_spacing.val != 0) {
564    putc('v', stderr);
565    if (vertical_spacing.inc > 0)
566      putc('+', stderr);
567    else if (vertical_spacing.inc < 0)
568      putc('-', stderr);
569    fprintf(stderr, "%d ", vertical_spacing.val);
570  }
571  if (!font.empty()) {
572    putc('f', stderr);
573    put_string(font, stderr);
574    putc(' ', stderr);
575  }
576  switch (vertical_alignment) {
577  case entry_modifier::CENTER:
578    break;
579  case entry_modifier::TOP:
580    putc('t', stderr);
581    break;
582  case entry_modifier::BOTTOM:
583    putc('d', stderr);
584    break;
585  }
586  if (zero_width)
587    putc('z', stderr);
588  if (stagger)
589    putc('u', stderr);
590}
591
592struct format {
593  int nrows;
594  int ncolumns;
595  int *separation;
596  string *width;
597  char *equal;
598  entry_format **entry;
599  char **vline;
600
601  format(int nr, int nc);
602  ~format();
603  void add_rows(int n);
604};
605
606format::format(int nr, int nc) : nrows(nr), ncolumns(nc)
607{
608  int i;
609  separation = ncolumns > 1 ? new int[ncolumns - 1] : 0;
610  for (i = 0; i < ncolumns-1; i++)
611    separation[i] = -1;
612  width = new string[ncolumns];
613  equal = new char[ncolumns];
614  for (i = 0; i < ncolumns; i++)
615    equal[i] = 0;
616  entry = new entry_format *[nrows];
617  for (i = 0; i < nrows; i++)
618    entry[i] = new entry_format[ncolumns];
619  vline = new char*[nrows];
620  for (i = 0; i < nrows; i++) {
621    vline[i] = new char[ncolumns+1];
622    for (int j = 0; j < ncolumns+1; j++)
623      vline[i][j] = 0;
624  }
625}
626
627void format::add_rows(int n)
628{
629  int i;
630  char **old_vline = vline;
631  vline = new char*[nrows + n];
632  for (i = 0; i < nrows; i++)
633    vline[i] = old_vline[i];
634  a_delete old_vline;
635  for (i = 0; i < n; i++) {
636    vline[nrows + i] = new char[ncolumns + 1];
637    for (int j = 0; j < ncolumns + 1; j++)
638      vline[nrows + i][j] = 0;
639  }
640  entry_format **old_entry = entry;
641  entry = new entry_format *[nrows + n];
642  for (i = 0; i < nrows; i++)
643    entry[i] = old_entry[i];
644  a_delete old_entry;
645  for (i = 0; i < n; i++)
646    entry[nrows + i] = new entry_format[ncolumns];
647  nrows += n;
648}
649
650format::~format()
651{
652  a_delete separation;
653  ad_delete(ncolumns) width;
654  a_delete equal;
655  for (int i = 0; i < nrows; i++) {
656    a_delete vline[i];
657    ad_delete(ncolumns) entry[i];
658  }
659  a_delete vline;
660  a_delete entry;
661}
662
663struct input_entry_format : public entry_format {
664  input_entry_format *next;
665  string width;
666  int separation;
667  int vline;
668  int pre_vline;
669  int last_column;
670  int equal;
671  input_entry_format(format_type, input_entry_format * = 0);
672  ~input_entry_format();
673  void debug_print();
674};
675
676input_entry_format::input_entry_format(format_type t, input_entry_format *p)
677: entry_format(t), next(p)
678{
679  separation = -1;
680  last_column = 0;
681  vline = 0;
682  pre_vline = 0;
683  equal = 0;
684}
685
686input_entry_format::~input_entry_format()
687{
688}
689
690void free_input_entry_format_list(input_entry_format *list)
691{
692  while (list) {
693    input_entry_format *tem = list;
694    list = list->next;
695    delete tem;
696  }
697}
698
699void input_entry_format::debug_print()
700{
701  int i;
702  for (i = 0; i < pre_vline; i++)
703    putc('|', stderr);
704  entry_format::debug_print();
705  if (!width.empty()) {
706    putc('w', stderr);
707    putc('(', stderr);
708    put_string(width, stderr);
709    putc(')', stderr);
710  }
711  if (equal)
712    putc('e', stderr);
713  if (separation >= 0)
714    fprintf(stderr, "%d", separation);
715  for (i = 0; i < vline; i++)
716    putc('|', stderr);
717  if (last_column)
718    putc(',', stderr);
719}
720
721// Return zero if we should give up on this table.
722// If this is a continuation format line, current_format will be the current
723// format line.
724
725format *process_format(table_input &in, options *opt,
726		       format *current_format = 0)
727{
728  input_entry_format *list = 0;
729  int c = in.get();
730  for (;;) {
731    int pre_vline = 0;
732    int got_format = 0;
733    int got_period = 0;
734    format_type t = FORMAT_LEFT;
735    for (;;) {
736      if (c == EOF) {
737	error("end of input while processing format");
738	free_input_entry_format_list(list);
739	return 0;
740      }
741      switch (c) {
742      case 'n':
743      case 'N':
744	t = FORMAT_NUMERIC;
745	got_format = 1;
746	break;
747      case 'a':
748      case 'A':
749	got_format = 1;
750	t = FORMAT_ALPHABETIC;
751	break;
752      case 'c':
753      case 'C':
754	got_format = 1;
755	t = FORMAT_CENTER;
756	break;
757      case 'l':
758      case 'L':
759	got_format = 1;
760	t = FORMAT_LEFT;
761	break;
762      case 'r':
763      case 'R':
764	got_format = 1;
765	t = FORMAT_RIGHT;
766	break;
767      case 's':
768      case 'S':
769	got_format = 1;
770	t = FORMAT_SPAN;
771	break;
772      case '^':
773	got_format = 1;
774	t = FORMAT_VSPAN;
775	break;
776      case '_':
777      case '-':			// tbl also accepts this
778	got_format = 1;
779	t = FORMAT_HLINE;
780	break;
781      case '=':
782	got_format = 1;
783	t = FORMAT_DOUBLE_HLINE;
784	break;
785      case '.':
786	got_period = 1;
787	break;
788      case '|':
789	pre_vline++;
790	break;
791      case ' ':
792      case '\t':
793      case '\n':
794	break;
795      default:
796	if (c == opt->tab_char)
797	  break;
798	error("unrecognised format `%1'", char(c));
799	free_input_entry_format_list(list);
800	return 0;
801      }
802      if (got_period)
803	break;
804      c = in.get();
805      if (got_format)
806	break;
807    }
808    if (got_period)
809      break;
810    list = new input_entry_format(t, list);
811    if (pre_vline)
812      list->pre_vline = pre_vline;
813    int success = 1;
814    do {
815      switch (c) {
816      case 't':
817      case 'T':
818	c = in.get();
819	list->vertical_alignment = entry_modifier::TOP;
820	break;
821      case 'd':
822      case 'D':
823	c = in.get();
824	list->vertical_alignment = entry_modifier::BOTTOM;
825	break;
826      case 'u':
827      case 'U':
828	c = in.get();
829	list->stagger = 1;
830	break;
831      case 'z':
832      case 'Z':
833	c = in.get();
834	list->zero_width = 1;
835	break;
836      case '0':
837      case '1':
838      case '2':
839      case '3':
840      case '4':
841      case '5':
842      case '6':
843      case '7':
844      case '8':
845      case '9':
846	{
847	  int w = 0;
848	  do {
849	    w = w*10 + (c - '0');
850	    c = in.get();
851	  } while (c != EOF && csdigit(c));
852	  list->separation = w;
853	}
854	break;
855      case 'f':
856      case 'F':
857	do {
858	  c = in.get();
859	} while (c == ' ' || c == '\t');
860	if (c == EOF) {
861	  error("missing font name");
862	  break;
863	}
864	if (c == '(') {
865	  for (;;) {
866	    c = in.get();
867	    if (c == EOF || c == ' ' || c == '\t') {
868	      error("missing `)'");
869	      break;
870	    }
871	    if (c == ')') {
872	      c = in.get();
873	      break;
874	    }
875	    list->font += char(c);
876	  }
877	}
878	else {
879	  list->font = c;
880	  char cc = c;
881	  c = in.get();
882	  if (!csdigit(cc)
883	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
884	    list->font += char(c);
885	    c = in.get();
886	  }
887	}
888	break;
889      case 'v':
890      case 'V':
891	c = in.get();
892	list->vertical_spacing.val = 0;
893	list->vertical_spacing.inc = 0;
894	if (c == '+' || c == '-') {
895	  list->vertical_spacing.inc = (c == '+' ? 1 : -1);
896	  c = in.get();
897	}
898	if (c == EOF || !csdigit(c)) {
899	  error("`v' modifier must be followed by number");
900	  list->vertical_spacing.inc = 0;
901	}
902	else {
903	  do {
904	    list->vertical_spacing.val *= 10;
905	    list->vertical_spacing.val += c - '0';
906	    c = in.get();
907	  } while (c != EOF && csdigit(c));
908	}
909	if (list->vertical_spacing.val > MAX_VERTICAL_SPACING
910	    || list->vertical_spacing.val < -MAX_VERTICAL_SPACING) {
911	  error("unreasonable vertical spacing");
912	  list->vertical_spacing.val = 0;
913	  list->vertical_spacing.inc = 0;
914	}
915	break;
916      case 'p':
917      case 'P':
918	c = in.get();
919	list->point_size.val = 0;
920	list->point_size.inc = 0;
921	if (c == '+' || c == '-') {
922	  list->point_size.inc = (c == '+' ? 1 : -1);
923	  c = in.get();
924	}
925	if (c == EOF || !csdigit(c)) {
926	  error("`p' modifier must be followed by number");
927	  list->point_size.inc = 0;
928	}
929	else {
930	  do {
931	    list->point_size.val *= 10;
932	    list->point_size.val += c - '0';
933	    c = in.get();
934	  } while (c != EOF && csdigit(c));
935	}
936	if (list->point_size.val > MAX_POINT_SIZE
937	    || list->point_size.val < -MAX_POINT_SIZE) {
938	  error("unreasonable point size");
939	  list->point_size.val = 0;
940	  list->point_size.inc = 0;
941	}
942	break;
943      case 'w':
944      case 'W':
945	c = in.get();
946	while (c == ' ' || c == '\t')
947	  c = in.get();
948	if (c == '(') {
949	  list->width = "";
950	  c = in.get();
951	  while (c != ')') {
952	    if (c == EOF || c == '\n') {
953	      error("missing `)'");
954	      free_input_entry_format_list(list);
955	      return 0;
956	    }
957	    list->width += c;
958	    c = in.get();
959	  }
960	  c = in.get();
961	}
962	else {
963	  if (c == '+' || c == '-') {
964	    list->width = char(c);
965	    c = in.get();
966	  }
967	  else
968	    list->width = "";
969	  if (c == EOF || !csdigit(c))
970	    error("bad argument for `w' modifier");
971	  else {
972	    do {
973	      list->width += char(c);
974	      c = in.get();
975	    } while (c != EOF && csdigit(c));
976	  }
977	}
978	break;
979      case 'e':
980      case 'E':
981	c = in.get();
982	list->equal++;
983	break;
984      case '|':
985	c = in.get();
986	list->vline++;
987	break;
988      case 'B':
989      case 'b':
990	c = in.get();
991	list->font = "B";
992	break;
993      case 'I':
994      case 'i':
995	c = in.get();
996	list->font = "I";
997	break;
998      case ' ':
999      case '\t':
1000	c = in.get();
1001	break;
1002      default:
1003	if (c == opt->tab_char)
1004	  c = in.get();
1005	else
1006	  success = 0;
1007	break;
1008      }
1009    } while (success);
1010    if (list->vline > 2) {
1011      list->vline = 2;
1012      error("more than 2 vertical bars between key letters");
1013    }
1014    if (c == '\n' || c == ',') {
1015      c = in.get();
1016      list->last_column = 1;
1017    }
1018  }
1019  if (c == '.') {
1020    do {
1021      c = in.get();
1022    } while (c == ' ' || c == '\t');
1023    if (c != '\n') {
1024      error("`.' not last character on line");
1025      free_input_entry_format_list(list);
1026      return 0;
1027    }
1028  }
1029  if (!list) {
1030    error("no format");
1031    free_input_entry_format_list(list);
1032    return 0;
1033  }
1034  list->last_column = 1;
1035  // now reverse the list so that the first row is at the beginning
1036  input_entry_format *rev = 0;
1037  while (list != 0) {
1038    input_entry_format *tem = list->next;
1039    list->next = rev;
1040    rev = list;
1041    list = tem;
1042  }
1043  list = rev;
1044  input_entry_format *tem;
1045
1046#if 0
1047  for (tem = list; tem; tem = tem->next)
1048    tem->debug_print();
1049  putc('\n', stderr);
1050#endif
1051  // compute number of columns and rows
1052  int ncolumns = 0;
1053  int nrows = 0;
1054  int col = 0;
1055  for (tem = list; tem; tem = tem->next) {
1056    if (tem->last_column) {
1057      if (col >= ncolumns)
1058	ncolumns = col + 1;
1059      col = 0;
1060      nrows++;
1061    }
1062    else
1063      col++;
1064  }
1065  int row;
1066  format *f;
1067  if (current_format) {
1068    if (ncolumns > current_format->ncolumns) {
1069      error("cannot increase the number of columns in a continued format");
1070      free_input_entry_format_list(list);
1071      return 0;
1072    }
1073    f = current_format;
1074    row = f->nrows;
1075    f->add_rows(nrows);
1076  }
1077  else {
1078    f = new format(nrows, ncolumns);
1079    row = 0;
1080  }
1081  col = 0;
1082  for (tem = list; tem; tem = tem->next) {
1083    f->entry[row][col] = *tem;
1084    if (col < ncolumns-1) {
1085      // use the greatest separation
1086      if (tem->separation > f->separation[col]) {
1087	if (current_format)
1088	  error("cannot change column separation in continued format");
1089	else
1090	  f->separation[col] = tem->separation;
1091      }
1092    }
1093    else if (tem->separation >= 0)
1094      error("column separation specified for last column");
1095    if (tem->equal && !f->equal[col]) {
1096      if (current_format)
1097	error("cannot change which columns are equal in continued format");
1098      else
1099	f->equal[col] = 1;
1100    }
1101    if (!tem->width.empty()) {
1102      // use the last width
1103      if (!f->width[col].empty() && f->width[col] != tem->width)
1104	error("multiple widths for column %1", col+1);
1105      f->width[col] = tem->width;
1106    }
1107    if (tem->pre_vline) {
1108      assert(col == 0);
1109      f->vline[row][col] = tem->pre_vline;
1110    }
1111    f->vline[row][col+1] = tem->vline;
1112    if (tem->last_column) {
1113      row++;
1114      col = 0;
1115    }
1116    else
1117      col++;
1118  }
1119  free_input_entry_format_list(list);
1120  for (col = 0; col < ncolumns; col++) {
1121    entry_format *e = f->entry[f->nrows-1] + col;
1122    if (e->type != FORMAT_HLINE
1123	&& e->type != FORMAT_DOUBLE_HLINE
1124	&& e->type != FORMAT_SPAN)
1125      break;
1126  }
1127  if (col >= ncolumns) {
1128    error("last row of format is all lines");
1129    delete f;
1130    return 0;
1131  }
1132  return f;
1133}
1134
1135table *process_data(table_input &in, format *f, options *opt)
1136{
1137  char tab_char = opt->tab_char;
1138  int ncolumns = f->ncolumns;
1139  int current_row = 0;
1140  int format_index = 0;
1141  int give_up = 0;
1142  enum { DATA_INPUT_LINE, TROFF_INPUT_LINE, SINGLE_HLINE, DOUBLE_HLINE } type;
1143  table *tbl = new table(ncolumns, opt->flags, opt->linesize,
1144			 opt->decimal_point_char);
1145  if (opt->delim[0] != '\0')
1146    tbl->set_delim(opt->delim[0], opt->delim[1]);
1147  for (;;) {
1148    // first determine what type of line this is
1149    int c = in.get();
1150    if (c == EOF)
1151      break;
1152    if (c == '.') {
1153      int d = in.get();
1154      if (d != EOF && csdigit(d)) {
1155	in.unget(d);
1156	type = DATA_INPUT_LINE;
1157      }
1158      else {
1159	in.unget(d);
1160	type = TROFF_INPUT_LINE;
1161      }
1162    }
1163    else if (c == '_' || c == '=') {
1164      int d = in.get();
1165      if (d == '\n') {
1166	if (c == '_')
1167	  type = SINGLE_HLINE;
1168	else
1169	  type = DOUBLE_HLINE;
1170      }
1171      else {
1172	in.unget(d);
1173	type = DATA_INPUT_LINE;
1174      }
1175    }
1176    else {
1177      type = DATA_INPUT_LINE;
1178    }
1179    switch (type) {
1180    case DATA_INPUT_LINE:
1181      {
1182	string input_entry;
1183	if (format_index >= f->nrows)
1184	  format_index = f->nrows - 1;
1185	// A format row that is all lines doesn't use up a data line.
1186	while (format_index < f->nrows - 1) {
1187	  int c;
1188	  for (c = 0; c < ncolumns; c++) {
1189	    entry_format *e = f->entry[format_index] + c;
1190	    if (e->type != FORMAT_HLINE
1191		&& e->type != FORMAT_DOUBLE_HLINE
1192		// Unfortunately tbl treats a span as needing data.
1193		// && e->type != FORMAT_SPAN
1194		)
1195	      break;
1196	  }
1197	  if (c < ncolumns)
1198	    break;
1199	  for (c = 0; c < ncolumns; c++)
1200	    tbl->add_entry(current_row, c, input_entry,
1201			   f->entry[format_index] + c, current_filename,
1202			   current_lineno);
1203	  tbl->add_vlines(current_row, f->vline[format_index]);
1204	  format_index++;
1205	  current_row++;
1206	}
1207	entry_format *line_format = f->entry[format_index];
1208	int col = 0;
1209	int row_comment = 0;
1210	for (;;) {
1211	  if (c == tab_char || c == '\n') {
1212	    int ln = current_lineno;
1213	    if (c == '\n')
1214	      --ln;
1215	    if ((opt->flags & table::NOSPACES))
1216	      input_entry.remove_spaces();
1217	    while (col < ncolumns
1218		   && line_format[col].type == FORMAT_SPAN) {
1219	      tbl->add_entry(current_row, col, "", &line_format[col],
1220			     current_filename, ln);
1221	      col++;
1222	    }
1223	    if (c == '\n' && input_entry.length() == 2
1224		&& input_entry[0] == 'T' && input_entry[1] == '{') {
1225	      input_entry = "";
1226	      ln++;
1227	      enum {
1228		START, MIDDLE, GOT_T, GOT_RIGHT_BRACE, GOT_DOT,
1229		GOT_l, GOT_lf, END
1230	      } state = START;
1231	      while (state != END) {
1232		c = in.get();
1233		if (c == EOF)
1234		  break;
1235		switch (state) {
1236		case START:
1237		  if (c == 'T')
1238		    state = GOT_T;
1239		  else if (c == '.')
1240		    state = GOT_DOT;
1241		  else {
1242		    input_entry += c;
1243		    if (c != '\n')
1244		      state = MIDDLE;
1245		  }
1246		  break;
1247		case GOT_T:
1248		  if (c == '}')
1249		    state = GOT_RIGHT_BRACE;
1250		  else {
1251		    input_entry += 'T';
1252		    input_entry += c;
1253		    state = c == '\n' ? START : MIDDLE;
1254		  }
1255		  break;
1256		case GOT_DOT:
1257		  if (c == 'l')
1258		    state = GOT_l;
1259		  else {
1260		    input_entry += '.';
1261		    input_entry += c;
1262		    state = c == '\n' ? START : MIDDLE;
1263		  }
1264		  break;
1265		case GOT_l:
1266		  if (c == 'f')
1267		    state = GOT_lf;
1268		  else {
1269		    input_entry += ".l";
1270		    input_entry += c;
1271		    state = c == '\n' ? START : MIDDLE;
1272		  }
1273		  break;
1274		case GOT_lf:
1275		  if (c == ' ' || c == '\n' || compatible_flag) {
1276		    string args;
1277		    input_entry += ".lf";
1278		    while (c != EOF) {
1279		      args += c;
1280		      if (c == '\n')
1281			break;
1282		      c = in.get();
1283		    }
1284		    args += '\0';
1285		    interpret_lf_args(args.contents());
1286		    // remove the '\0'
1287		    args.set_length(args.length() - 1);
1288		    input_entry += args;
1289		    state = START;
1290		  }
1291		  else {
1292		    input_entry += ".lf";
1293		    input_entry += c;
1294		    state = MIDDLE;
1295		  }
1296		  break;
1297		case GOT_RIGHT_BRACE:
1298		  if (c == '\n' || c == tab_char)
1299		    state = END;
1300		  else {
1301		    input_entry += 'T';
1302		    input_entry += '}';
1303		    input_entry += c;
1304		    state = c == '\n' ? START : MIDDLE;
1305		  }
1306		  break;
1307		case MIDDLE:
1308		  if (c == '\n')
1309		    state = START;
1310		  input_entry += c;
1311		  break;
1312		case END:
1313		default:
1314		  assert(0);
1315		}
1316	      }
1317	      if (c == EOF) {
1318		error("end of data in middle of text block");
1319		give_up = 1;
1320		break;
1321	      }
1322	    }
1323	    if (col >= ncolumns) {
1324	      if (!input_entry.empty()) {
1325		if (input_entry.length() >= 2
1326		    && input_entry[0] == '\\'
1327		    && input_entry[1] == '"')
1328		  row_comment = 1;
1329		else if (!row_comment) {
1330		  if (c == '\n')
1331		    in.unget(c);
1332		  input_entry += '\0';
1333		  error("excess data entry `%1' discarded",
1334			input_entry.contents());
1335		  if (c == '\n')
1336		    (void)in.get();
1337		}
1338	      }
1339	    }
1340	    else
1341	      tbl->add_entry(current_row, col, input_entry,
1342			     &line_format[col], current_filename, ln);
1343	    col++;
1344	    if (c == '\n')
1345	      break;
1346	    input_entry = "";
1347	  }
1348	  else
1349	    input_entry += c;
1350	  c = in.get();
1351	  if (c == EOF)
1352	    break;
1353	}
1354	if (give_up)
1355	  break;
1356	input_entry = "";
1357	for (; col < ncolumns; col++)
1358	  tbl->add_entry(current_row, col, input_entry, &line_format[col],
1359			 current_filename, current_lineno - 1);
1360	tbl->add_vlines(current_row, f->vline[format_index]);
1361	current_row++;
1362	format_index++;
1363      }
1364      break;
1365    case TROFF_INPUT_LINE:
1366      {
1367	string line;
1368	int ln = current_lineno;
1369	for (;;) {
1370	  line += c;
1371	  if (c == '\n')
1372	    break;
1373	  c = in.get();
1374	  if (c == EOF) {
1375	    break;
1376	  }
1377	}
1378	tbl->add_text_line(current_row, line, current_filename, ln);
1379	if (line.length() >= 4
1380	    && line[0] == '.' && line[1] == 'T' && line[2] == '&') {
1381	  format *newf = process_format(in, opt, f);
1382	  if (newf == 0)
1383	    give_up = 1;
1384	  else
1385	    f = newf;
1386	}
1387	if (line.length() >= 3
1388	    && line[0] == '.' && line[1] == 'l' && line[2] == 'f') {
1389	  line += '\0';
1390	  interpret_lf_args(line.contents() + 3);
1391	}
1392      }
1393      break;
1394    case SINGLE_HLINE:
1395      tbl->add_single_hline(current_row);
1396      break;
1397    case DOUBLE_HLINE:
1398      tbl->add_double_hline(current_row);
1399      break;
1400    default:
1401      assert(0);
1402    }
1403    if (give_up)
1404      break;
1405  }
1406  if (!give_up && current_row == 0) {
1407    error("no real data");
1408    give_up = 1;
1409  }
1410  if (give_up) {
1411    delete tbl;
1412    return 0;
1413  }
1414  // Do this here rather than at the beginning in case continued formats
1415  // change it.
1416  int i;
1417  for (i = 0; i < ncolumns - 1; i++)
1418    if (f->separation[i] >= 0)
1419      tbl->set_column_separation(i, f->separation[i]);
1420  for (i = 0; i < ncolumns; i++)
1421    if (!f->width[i].empty())
1422      tbl->set_minimum_width(i, f->width[i]);
1423  for (i = 0; i < ncolumns; i++)
1424    if (f->equal[i])
1425      tbl->set_equal_column(i);
1426  return tbl;
1427}
1428
1429void process_table(table_input &in)
1430{
1431  int c;
1432  options *opt = 0;
1433  format *form = 0;
1434  table *tbl = 0;
1435  if ((opt = process_options(in)) != 0
1436      && (form = process_format(in, opt)) != 0
1437      && (tbl = process_data(in, form, opt)) != 0) {
1438    tbl->print();
1439    delete tbl;
1440  }
1441  else {
1442    error("giving up on this table");
1443    while ((c = in.get()) != EOF)
1444      ;
1445  }
1446  delete opt;
1447  delete form;
1448  if (!in.ended())
1449    error("premature end of file");
1450}
1451
1452static void usage(FILE *stream)
1453{
1454  fprintf(stream, "usage: %s [ -vC ] [ files... ]\n", program_name);
1455}
1456
1457int main(int argc, char **argv)
1458{
1459  program_name = argv[0];
1460  static char stderr_buf[BUFSIZ];
1461  setbuf(stderr, stderr_buf);
1462  int opt;
1463  static const struct option long_options[] = {
1464    { "help", no_argument, 0, CHAR_MAX + 1 },
1465    { "version", no_argument, 0, 'v' },
1466    { NULL, 0, 0, 0 }
1467  };
1468  while ((opt = getopt_long(argc, argv, "vCT:", long_options, NULL)) != EOF)
1469    switch (opt) {
1470    case 'C':
1471      compatible_flag = 1;
1472      break;
1473    case 'v':
1474      {
1475	printf("GNU tbl (groff) version %s\n", Version_string);
1476	exit(0);
1477	break;
1478      }
1479    case 'T':
1480      // I'm sick of getting bug reports from IRIX users
1481      break;
1482    case CHAR_MAX + 1: // --help
1483      usage(stdout);
1484      exit(0);
1485      break;
1486    case '?':
1487      usage(stderr);
1488      exit(1);
1489      break;
1490    default:
1491      assert(0);
1492    }
1493  printf(".if !\\n(.g .ab GNU tbl requires GNU troff.\n"
1494	 ".if !dTS .ds TS\n"
1495	 ".if !dTE .ds TE\n");
1496  if (argc > optind) {
1497    for (int i = optind; i < argc; i++)
1498      if (argv[i][0] == '-' && argv[i][1] == '\0') {
1499	current_filename = "-";
1500	current_lineno = 1;
1501	printf(".lf 1 -\n");
1502	process_input_file(stdin);
1503      }
1504      else {
1505	errno = 0;
1506	FILE *fp = fopen(argv[i], "r");
1507	if (fp == 0) {
1508	  current_lineno = -1;
1509	  error("can't open `%1': %2", argv[i], strerror(errno));
1510	}
1511	else {
1512	  current_lineno = 1;
1513	  current_filename = argv[i];
1514	  printf(".lf 1 %s\n", current_filename);
1515	  process_input_file(fp);
1516	}
1517      }
1518  }
1519  else {
1520    current_filename = "-";
1521    current_lineno = 1;
1522    printf(".lf 1 -\n");
1523    process_input_file(stdin);
1524  }
1525  if (ferror(stdout) || fflush(stdout) < 0)
1526    fatal("output error");
1527  return 0;
1528}
1529
1530