1/* unexpand - convert blanks to tabs
2   Copyright (C) 1989, 1991, 1995-2006, 2008-2010 Free Software Foundation,
3   Inc.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation, either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18/* By default, convert only maximal strings of initial blanks and tabs
19   into tabs.
20   Preserves backspace characters in the output; they decrement the
21   column count for tab calculations.
22   The default action is equivalent to -8.
23
24   Options:
25   --tabs=tab1[,tab2[,...]]
26   -t tab1[,tab2[,...]]
27   -tab1[,tab2[,...]]	If only one tab stop is given, set the tabs tab1
28                        columns apart instead of the default 8.  Otherwise,
29                        set the tabs at columns tab1, tab2, etc. (numbered from
30                        0); preserve any blanks beyond the tab stops given.
31   --all
32   -a			Use tabs wherever they would replace 2 or more blanks,
33                        not just at the beginnings of lines.
34
35   David MacKenzie <djm@gnu.ai.mit.edu> */
36
37#include <config.h>
38
39#include <stdio.h>
40#include <getopt.h>
41#include <sys/types.h>
42#include "system.h"
43#include "error.h"
44#include "quote.h"
45#include "xstrndup.h"
46
47/* The official name of this program (e.g., no `g' prefix).  */
48#define PROGRAM_NAME "unexpand"
49
50#define AUTHORS proper_name ("David MacKenzie")
51
52/* If true, convert blanks even after nonblank characters have been
53   read on the line.  */
54static bool convert_entire_line;
55
56/* If nonzero, the size of all tab stops.  If zero, use `tab_list' instead.  */
57static size_t tab_size;
58
59/* The maximum distance between tab stops.  */
60static size_t max_column_width;
61
62/* Array of the explicit column numbers of the tab stops;
63   after `tab_list' is exhausted, the rest of the line is printed
64   unchanged.  The first column is column 0.  */
65static uintmax_t *tab_list;
66
67/* The number of allocated entries in `tab_list'.  */
68static size_t n_tabs_allocated;
69
70/* The index of the first invalid element of `tab_list',
71   where the next element can be added.  */
72static size_t first_free_tab;
73
74/* Null-terminated array of input filenames.  */
75static char **file_list;
76
77/* Default for `file_list' if no files are given on the command line.  */
78static char *stdin_argv[] =
79{
80  (char *) "-", NULL
81};
82
83/* True if we have ever read standard input.  */
84static bool have_read_stdin;
85
86/* The desired exit status.  */
87static int exit_status;
88
89/* For long options that have no equivalent short option, use a
90   non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
91enum
92{
93  CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
94};
95
96static struct option const longopts[] =
97{
98  {"tabs", required_argument, NULL, 't'},
99  {"all", no_argument, NULL, 'a'},
100  {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
101  {GETOPT_HELP_OPTION_DECL},
102  {GETOPT_VERSION_OPTION_DECL},
103  {NULL, 0, NULL, 0}
104};
105
106void
107usage (int status)
108{
109  if (status != EXIT_SUCCESS)
110    fprintf (stderr, _("Try `%s --help' for more information.\n"),
111             program_name);
112  else
113    {
114      printf (_("\
115Usage: %s [OPTION]... [FILE]...\n\
116"),
117              program_name);
118      fputs (_("\
119Convert blanks in each FILE to tabs, writing to standard output.\n\
120With no FILE, or when FILE is -, read standard input.\n\
121\n\
122"), stdout);
123      fputs (_("\
124Mandatory arguments to long options are mandatory for short options too.\n\
125"), stdout);
126      fputs (_("\
127  -a, --all        convert all blanks, instead of just initial blanks\n\
128      --first-only  convert only leading sequences of blanks (overrides -a)\n\
129  -t, --tabs=N     have tabs N characters apart instead of 8 (enables -a)\n\
130  -t, --tabs=LIST  use comma separated LIST of tab positions (enables -a)\n\
131"), stdout);
132      fputs (HELP_OPTION_DESCRIPTION, stdout);
133      fputs (VERSION_OPTION_DESCRIPTION, stdout);
134      emit_ancillary_info ();
135    }
136  exit (status);
137}
138
139/* Add tab stop TABVAL to the end of `tab_list'.  */
140
141static void
142add_tab_stop (uintmax_t tabval)
143{
144  uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
145  uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
146
147  if (first_free_tab == n_tabs_allocated)
148    tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
149  tab_list[first_free_tab++] = tabval;
150
151  if (max_column_width < column_width)
152    {
153      if (SIZE_MAX < column_width)
154        error (EXIT_FAILURE, 0, _("tabs are too far apart"));
155      max_column_width = column_width;
156    }
157}
158
159/* Add the comma or blank separated list of tab stops STOPS
160   to the list of tab stops.  */
161
162static void
163parse_tab_stops (char const *stops)
164{
165  bool have_tabval = false;
166  uintmax_t tabval IF_LINT (= 0);
167  char const *num_start IF_LINT (= NULL);
168  bool ok = true;
169
170  for (; *stops; stops++)
171    {
172      if (*stops == ',' || isblank (to_uchar (*stops)))
173        {
174          if (have_tabval)
175            add_tab_stop (tabval);
176          have_tabval = false;
177        }
178      else if (ISDIGIT (*stops))
179        {
180          if (!have_tabval)
181            {
182              tabval = 0;
183              have_tabval = true;
184              num_start = stops;
185            }
186
187          /* Detect overflow.  */
188          if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
189            {
190              size_t len = strspn (num_start, "0123456789");
191              char *bad_num = xstrndup (num_start, len);
192              error (0, 0, _("tab stop is too large %s"), quote (bad_num));
193              free (bad_num);
194              ok = false;
195              stops = num_start + len - 1;
196            }
197        }
198      else
199        {
200          error (0, 0, _("tab size contains invalid character(s): %s"),
201                 quote (stops));
202          ok = false;
203          break;
204        }
205    }
206
207  if (!ok)
208    exit (EXIT_FAILURE);
209
210  if (have_tabval)
211    add_tab_stop (tabval);
212}
213
214/* Check that the list of tab stops TABS, with ENTRIES entries,
215   contains only nonzero, ascending values.  */
216
217static void
218validate_tab_stops (uintmax_t const *tabs, size_t entries)
219{
220  uintmax_t prev_tab = 0;
221  size_t i;
222
223  for (i = 0; i < entries; i++)
224    {
225      if (tabs[i] == 0)
226        error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
227      if (tabs[i] <= prev_tab)
228        error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
229      prev_tab = tabs[i];
230    }
231}
232
233/* Close the old stream pointer FP if it is non-NULL,
234   and return a new one opened to read the next input file.
235   Open a filename of `-' as the standard input.
236   Return NULL if there are no more input files.  */
237
238static FILE *
239next_file (FILE *fp)
240{
241  static char *prev_file;
242  char *file;
243
244  if (fp)
245    {
246      if (ferror (fp))
247        {
248          error (0, errno, "%s", prev_file);
249          exit_status = EXIT_FAILURE;
250        }
251      if (STREQ (prev_file, "-"))
252        clearerr (fp);		/* Also clear EOF.  */
253      else if (fclose (fp) != 0)
254        {
255          error (0, errno, "%s", prev_file);
256          exit_status = EXIT_FAILURE;
257        }
258    }
259
260  while ((file = *file_list++) != NULL)
261    {
262      if (STREQ (file, "-"))
263        {
264          have_read_stdin = true;
265          prev_file = file;
266          return stdin;
267        }
268      fp = fopen (file, "r");
269      if (fp)
270        {
271          prev_file = file;
272          return fp;
273        }
274      error (0, errno, "%s", file);
275      exit_status = EXIT_FAILURE;
276    }
277  return NULL;
278}
279
280/* Change blanks to tabs, writing to stdout.
281   Read each file in `file_list', in order.  */
282
283static void
284unexpand (void)
285{
286  /* Input stream.  */
287  FILE *fp = next_file (NULL);
288
289  /* The array of pending blanks.  In non-POSIX locales, blanks can
290     include characters other than spaces, so the blanks must be
291     stored, not merely counted.  */
292  char *pending_blank;
293
294  if (!fp)
295    return;
296
297  /* The worst case is a non-blank character, then one blank, then a
298     tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
299     allocate MAX_COLUMN_WIDTH bytes to store the blanks.  */
300  pending_blank = xmalloc (max_column_width);
301
302  for (;;)
303    {
304      /* Input character, or EOF.  */
305      int c;
306
307      /* If true, perform translations.  */
308      bool convert = true;
309
310
311      /* The following variables have valid values only when CONVERT
312         is true:  */
313
314      /* Column of next input character.  */
315      uintmax_t column = 0;
316
317      /* Column the next input tab stop is on.  */
318      uintmax_t next_tab_column = 0;
319
320      /* Index in TAB_LIST of next tab stop to examine.  */
321      size_t tab_index = 0;
322
323      /* If true, the first pending blank came just before a tab stop.  */
324      bool one_blank_before_tab_stop = false;
325
326      /* If true, the previous input character was a blank.  This is
327         initially true, since initial strings of blanks are treated
328         as if the line was preceded by a blank.  */
329      bool prev_blank = true;
330
331      /* Number of pending columns of blanks.  */
332      size_t pending = 0;
333
334
335      /* Convert a line of text.  */
336
337      do
338        {
339          while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
340            continue;
341
342          if (convert)
343            {
344              bool blank = !! isblank (c);
345
346              if (blank)
347                {
348                  if (next_tab_column <= column)
349                    {
350                      if (tab_size)
351                        next_tab_column =
352                          column + (tab_size - column % tab_size);
353                      else
354                        for (;;)
355                          if (tab_index == first_free_tab)
356                            {
357                              convert = false;
358                              break;
359                            }
360                          else
361                            {
362                              uintmax_t tab = tab_list[tab_index++];
363                              if (column < tab)
364                                {
365                                  next_tab_column = tab;
366                                  break;
367                                }
368                            }
369                    }
370
371                  if (convert)
372                    {
373                      if (next_tab_column < column)
374                        error (EXIT_FAILURE, 0, _("input line is too long"));
375
376                      if (c == '\t')
377                        {
378                          column = next_tab_column;
379
380                          /* Discard pending blanks, unless it was a single
381                             blank just before the previous tab stop.  */
382                          if (! (pending == 1 && one_blank_before_tab_stop))
383                            {
384                              pending = 0;
385                              one_blank_before_tab_stop = false;
386                            }
387                        }
388                      else
389                        {
390                          column++;
391
392                          if (! (prev_blank && column == next_tab_column))
393                            {
394                              /* It is not yet known whether the pending blanks
395                                 will be replaced by tabs.  */
396                              if (column == next_tab_column)
397                                one_blank_before_tab_stop = true;
398                              pending_blank[pending++] = c;
399                              prev_blank = true;
400                              continue;
401                            }
402
403                          /* Replace the pending blanks by a tab or two.  */
404                          pending_blank[0] = c = '\t';
405                          pending = one_blank_before_tab_stop;
406                        }
407                    }
408                }
409              else if (c == '\b')
410                {
411                  /* Go back one column, and force recalculation of the
412                     next tab stop.  */
413                  column -= !!column;
414                  next_tab_column = column;
415                  tab_index -= !!tab_index;
416                }
417              else
418                {
419                  column++;
420                  if (!column)
421                    error (EXIT_FAILURE, 0, _("input line is too long"));
422                }
423
424              if (pending)
425                {
426                  if (fwrite (pending_blank, 1, pending, stdout) != pending)
427                    error (EXIT_FAILURE, errno, _("write error"));
428                  pending = 0;
429                  one_blank_before_tab_stop = false;
430                }
431
432              prev_blank = blank;
433              convert &= convert_entire_line || blank;
434            }
435
436          if (c < 0)
437            {
438              free (pending_blank);
439              return;
440            }
441
442          if (putchar (c) < 0)
443            error (EXIT_FAILURE, errno, _("write error"));
444        }
445      while (c != '\n');
446    }
447}
448
449int
450main (int argc, char **argv)
451{
452  bool have_tabval = false;
453  uintmax_t tabval IF_LINT (= 0);
454  int c;
455
456  /* If true, cancel the effect of any -a (explicit or implicit in -t),
457     so that only leading blanks will be considered.  */
458  bool convert_first_only = false;
459
460  initialize_main (&argc, &argv);
461  set_program_name (argv[0]);
462  setlocale (LC_ALL, "");
463  bindtextdomain (PACKAGE, LOCALEDIR);
464  textdomain (PACKAGE);
465
466  atexit (close_stdout);
467
468  have_read_stdin = false;
469  exit_status = EXIT_SUCCESS;
470  convert_entire_line = false;
471  tab_list = NULL;
472  first_free_tab = 0;
473
474  while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
475         != -1)
476    {
477      switch (c)
478        {
479        case '?':
480          usage (EXIT_FAILURE);
481        case 'a':
482          convert_entire_line = true;
483          break;
484        case 't':
485          convert_entire_line = true;
486          parse_tab_stops (optarg);
487          break;
488        case CONVERT_FIRST_ONLY_OPTION:
489          convert_first_only = true;
490          break;
491        case ',':
492          if (have_tabval)
493            add_tab_stop (tabval);
494          have_tabval = false;
495          break;
496        case_GETOPT_HELP_CHAR;
497        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
498        default:
499          if (!have_tabval)
500            {
501              tabval = 0;
502              have_tabval = true;
503            }
504          if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
505            error (EXIT_FAILURE, 0, _("tab stop value is too large"));
506          break;
507        }
508    }
509
510  if (convert_first_only)
511    convert_entire_line = false;
512
513  if (have_tabval)
514    add_tab_stop (tabval);
515
516  validate_tab_stops (tab_list, first_free_tab);
517
518  if (first_free_tab == 0)
519    tab_size = max_column_width = 8;
520  else if (first_free_tab == 1)
521    tab_size = tab_list[0];
522  else
523    tab_size = 0;
524
525  file_list = (optind < argc ? &argv[optind] : stdin_argv);
526
527  unexpand ();
528
529  if (have_read_stdin && fclose (stdin) != 0)
530    error (EXIT_FAILURE, errno, "-");
531
532  exit (exit_status);
533}
534