1/* expand - convert tabs to spaces
2   Copyright (C) 1989, 1991, 1995-2006, 2008-2010 Free Software Foundation,
3   Inc.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation, either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18/* By default, convert all tabs to spaces.
19   Preserves backspace characters in the output; they decrement the
20   column count for tab calculations.
21   The default action is equivalent to -8.
22
23   Options:
24   --tabs=tab1[,tab2[,...]]
25   -t tab1[,tab2[,...]]
26   -tab1[,tab2[,...]]	If only one tab stop is given, set the tabs tab1
27                        columns apart instead of the default 8.  Otherwise,
28                        set the tabs at columns tab1, tab2, etc. (numbered from
29                        0); replace any tabs beyond the tab stops given with
30                        single spaces.
31   --initial
32   -i			Only convert initial tabs on each line to spaces.
33
34   David MacKenzie <djm@gnu.ai.mit.edu> */
35
36#include <config.h>
37
38#include <stdio.h>
39#include <getopt.h>
40#include <sys/types.h>
41#include "system.h"
42#include "error.h"
43#include "quote.h"
44#include "xstrndup.h"
45
46/* The official name of this program (e.g., no `g' prefix).  */
47#define PROGRAM_NAME "expand"
48
49#define AUTHORS proper_name ("David MacKenzie")
50
51/* If true, convert blanks even after nonblank characters have been
52   read on the line.  */
53static bool convert_entire_line;
54
55/* If nonzero, the size of all tab stops.  If zero, use `tab_list' instead.  */
56static uintmax_t tab_size;
57
58/* Array of the explicit column numbers of the tab stops;
59   after `tab_list' is exhausted, each additional tab is replaced
60   by a space.  The first column is column 0.  */
61static uintmax_t *tab_list;
62
63/* The number of allocated entries in `tab_list'.  */
64static size_t n_tabs_allocated;
65
66/* The index of the first invalid element of `tab_list',
67   where the next element can be added.  */
68static size_t first_free_tab;
69
70/* Null-terminated array of input filenames.  */
71static char **file_list;
72
73/* Default for `file_list' if no files are given on the command line.  */
74static char *stdin_argv[] =
75{
76  (char *) "-", NULL
77};
78
79/* True if we have ever read standard input.  */
80static bool have_read_stdin;
81
82/* The desired exit status.  */
83static int exit_status;
84
85static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::";
86
87static struct option const longopts[] =
88{
89  {"tabs", required_argument, NULL, 't'},
90  {"initial", no_argument, NULL, 'i'},
91  {GETOPT_HELP_OPTION_DECL},
92  {GETOPT_VERSION_OPTION_DECL},
93  {NULL, 0, NULL, 0}
94};
95
96void
97usage (int status)
98{
99  if (status != EXIT_SUCCESS)
100    fprintf (stderr, _("Try `%s --help' for more information.\n"),
101             program_name);
102  else
103    {
104      printf (_("\
105Usage: %s [OPTION]... [FILE]...\n\
106"),
107              program_name);
108      fputs (_("\
109Convert tabs in each FILE to spaces, writing to standard output.\n\
110With no FILE, or when FILE is -, read standard input.\n\
111\n\
112"), stdout);
113      fputs (_("\
114Mandatory arguments to long options are mandatory for short options too.\n\
115"), stdout);
116      fputs (_("\
117  -i, --initial       do not convert tabs after non blanks\n\
118  -t, --tabs=NUMBER   have tabs NUMBER characters apart, not 8\n\
119"), stdout);
120      fputs (_("\
121  -t, --tabs=LIST     use comma separated list of explicit tab positions\n\
122"), stdout);
123      fputs (HELP_OPTION_DESCRIPTION, stdout);
124      fputs (VERSION_OPTION_DESCRIPTION, stdout);
125      emit_ancillary_info ();
126    }
127  exit (status);
128}
129
130/* Add tab stop TABVAL to the end of `tab_list'.  */
131
132static void
133add_tab_stop (uintmax_t tabval)
134{
135  if (first_free_tab == n_tabs_allocated)
136    tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
137  tab_list[first_free_tab++] = tabval;
138}
139
140/* Add the comma or blank separated list of tab stops STOPS
141   to the list of tab stops.  */
142
143static void
144parse_tab_stops (char const *stops)
145{
146  bool have_tabval = false;
147  uintmax_t tabval IF_LINT (= 0);
148  char const *num_start IF_LINT (= NULL);
149  bool ok = true;
150
151  for (; *stops; stops++)
152    {
153      if (*stops == ',' || isblank (to_uchar (*stops)))
154        {
155          if (have_tabval)
156            add_tab_stop (tabval);
157          have_tabval = false;
158        }
159      else if (ISDIGIT (*stops))
160        {
161          if (!have_tabval)
162            {
163              tabval = 0;
164              have_tabval = true;
165              num_start = stops;
166            }
167
168          /* Detect overflow.  */
169          if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
170            {
171              size_t len = strspn (num_start, "0123456789");
172              char *bad_num = xstrndup (num_start, len);
173              error (0, 0, _("tab stop is too large %s"), quote (bad_num));
174              free (bad_num);
175              ok = false;
176              stops = num_start + len - 1;
177            }
178        }
179      else
180        {
181          error (0, 0, _("tab size contains invalid character(s): %s"),
182                 quote (stops));
183          ok = false;
184          break;
185        }
186    }
187
188  if (!ok)
189    exit (EXIT_FAILURE);
190
191  if (have_tabval)
192    add_tab_stop (tabval);
193}
194
195/* Check that the list of tab stops TABS, with ENTRIES entries,
196   contains only nonzero, ascending values.  */
197
198static void
199validate_tab_stops (uintmax_t const *tabs, size_t entries)
200{
201  uintmax_t prev_tab = 0;
202  size_t i;
203
204  for (i = 0; i < entries; i++)
205    {
206      if (tabs[i] == 0)
207        error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
208      if (tabs[i] <= prev_tab)
209        error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
210      prev_tab = tabs[i];
211    }
212}
213
214/* Close the old stream pointer FP if it is non-NULL,
215   and return a new one opened to read the next input file.
216   Open a filename of `-' as the standard input.
217   Return NULL if there are no more input files.  */
218
219static FILE *
220next_file (FILE *fp)
221{
222  static char *prev_file;
223  char *file;
224
225  if (fp)
226    {
227      if (ferror (fp))
228        {
229          error (0, errno, "%s", prev_file);
230          exit_status = EXIT_FAILURE;
231        }
232      if (STREQ (prev_file, "-"))
233        clearerr (fp);		/* Also clear EOF.  */
234      else if (fclose (fp) != 0)
235        {
236          error (0, errno, "%s", prev_file);
237          exit_status = EXIT_FAILURE;
238        }
239    }
240
241  while ((file = *file_list++) != NULL)
242    {
243      if (STREQ (file, "-"))
244        {
245          have_read_stdin = true;
246          prev_file = file;
247          return stdin;
248        }
249      fp = fopen (file, "r");
250      if (fp)
251        {
252          prev_file = file;
253          return fp;
254        }
255      error (0, errno, "%s", file);
256      exit_status = EXIT_FAILURE;
257    }
258  return NULL;
259}
260
261/* Change tabs to spaces, writing to stdout.
262   Read each file in `file_list', in order.  */
263
264static void
265expand (void)
266{
267  /* Input stream.  */
268  FILE *fp = next_file (NULL);
269
270  if (!fp)
271    return;
272
273  for (;;)
274    {
275      /* Input character, or EOF.  */
276      int c;
277
278      /* If true, perform translations.  */
279      bool convert = true;
280
281
282      /* The following variables have valid values only when CONVERT
283         is true:  */
284
285      /* Column of next input character.  */
286      uintmax_t column = 0;
287
288      /* Index in TAB_LIST of next tab stop to examine.  */
289      size_t tab_index = 0;
290
291
292      /* Convert a line of text.  */
293
294      do
295        {
296          while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
297            continue;
298
299          if (convert)
300            {
301              if (c == '\t')
302                {
303                  /* Column the next input tab stop is on.  */
304                  uintmax_t next_tab_column;
305
306                  if (tab_size)
307                    next_tab_column = column + (tab_size - column % tab_size);
308                  else
309                    for (;;)
310                      if (tab_index == first_free_tab)
311                        {
312                          next_tab_column = column + 1;
313                          break;
314                        }
315                      else
316                        {
317                          uintmax_t tab = tab_list[tab_index++];
318                          if (column < tab)
319                            {
320                              next_tab_column = tab;
321                              break;
322                            }
323                        }
324
325                  if (next_tab_column < column)
326                    error (EXIT_FAILURE, 0, _("input line is too long"));
327
328                  while (++column < next_tab_column)
329                    if (putchar (' ') < 0)
330                      error (EXIT_FAILURE, errno, _("write error"));
331
332                  c = ' ';
333                }
334              else if (c == '\b')
335                {
336                  /* Go back one column, and force recalculation of the
337                     next tab stop.  */
338                  column -= !!column;
339                  tab_index -= !!tab_index;
340                }
341              else
342                {
343                  column++;
344                  if (!column)
345                    error (EXIT_FAILURE, 0, _("input line is too long"));
346                }
347
348              convert &= convert_entire_line || !! isblank (c);
349            }
350
351          if (c < 0)
352            return;
353
354          if (putchar (c) < 0)
355            error (EXIT_FAILURE, errno, _("write error"));
356        }
357      while (c != '\n');
358    }
359}
360
361int
362main (int argc, char **argv)
363{
364  int c;
365
366  initialize_main (&argc, &argv);
367  set_program_name (argv[0]);
368  setlocale (LC_ALL, "");
369  bindtextdomain (PACKAGE, LOCALEDIR);
370  textdomain (PACKAGE);
371
372  atexit (close_stdout);
373
374  have_read_stdin = false;
375  exit_status = EXIT_SUCCESS;
376  convert_entire_line = true;
377  tab_list = NULL;
378  first_free_tab = 0;
379
380  while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
381    {
382      switch (c)
383        {
384        case 'i':
385          convert_entire_line = false;
386          break;
387
388        case 't':
389          parse_tab_stops (optarg);
390          break;
391
392        case '0': case '1': case '2': case '3': case '4':
393        case '5': case '6': case '7': case '8': case '9':
394          if (optarg)
395            parse_tab_stops (optarg - 1);
396          else
397            {
398              char tab_stop[2];
399              tab_stop[0] = c;
400              tab_stop[1] = '\0';
401              parse_tab_stops (tab_stop);
402            }
403          break;
404
405        case_GETOPT_HELP_CHAR;
406
407        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
408
409        default:
410          usage (EXIT_FAILURE);
411        }
412    }
413
414  validate_tab_stops (tab_list, first_free_tab);
415
416  if (first_free_tab == 0)
417    tab_size = 8;
418  else if (first_free_tab == 1)
419    tab_size = tab_list[0];
420  else
421    tab_size = 0;
422
423  file_list = (optind < argc ? &argv[optind] : stdin_argv);
424
425  expand ();
426
427  if (have_read_stdin && fclose (stdin) != 0)
428    error (EXIT_FAILURE, errno, "-");
429
430  exit (exit_status);
431}
432