1/* unexpand - convert blanks to tabs 2 Copyright (C) 1989, 1991, 1995-2006, 2008-2010 Free Software Foundation, 3 Inc. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18/* By default, convert only maximal strings of initial blanks and tabs 19 into tabs. 20 Preserves backspace characters in the output; they decrement the 21 column count for tab calculations. 22 The default action is equivalent to -8. 23 24 Options: 25 --tabs=tab1[,tab2[,...]] 26 -t tab1[,tab2[,...]] 27 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 28 columns apart instead of the default 8. Otherwise, 29 set the tabs at columns tab1, tab2, etc. (numbered from 30 0); preserve any blanks beyond the tab stops given. 31 --all 32 -a Use tabs wherever they would replace 2 or more blanks, 33 not just at the beginnings of lines. 34 35 David MacKenzie <djm@gnu.ai.mit.edu> */ 36 37#include <config.h> 38 39#include <stdio.h> 40#include <getopt.h> 41#include <sys/types.h> 42#include "system.h" 43#include "error.h" 44#include "quote.h" 45#include "xstrndup.h" 46 47/* The official name of this program (e.g., no `g' prefix). */ 48#define PROGRAM_NAME "unexpand" 49 50#define AUTHORS proper_name ("David MacKenzie") 51 52/* If true, convert blanks even after nonblank characters have been 53 read on the line. */ 54static bool convert_entire_line; 55 56/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ 57static size_t tab_size; 58 59/* The maximum distance between tab stops. */ 60static size_t max_column_width; 61 62/* Array of the explicit column numbers of the tab stops; 63 after `tab_list' is exhausted, the rest of the line is printed 64 unchanged. The first column is column 0. */ 65static uintmax_t *tab_list; 66 67/* The number of allocated entries in `tab_list'. */ 68static size_t n_tabs_allocated; 69 70/* The index of the first invalid element of `tab_list', 71 where the next element can be added. */ 72static size_t first_free_tab; 73 74/* Null-terminated array of input filenames. */ 75static char **file_list; 76 77/* Default for `file_list' if no files are given on the command line. */ 78static char *stdin_argv[] = 79{ 80 (char *) "-", NULL 81}; 82 83/* True if we have ever read standard input. */ 84static bool have_read_stdin; 85 86/* The desired exit status. */ 87static int exit_status; 88 89/* For long options that have no equivalent short option, use a 90 non-character as a pseudo short option, starting with CHAR_MAX + 1. */ 91enum 92{ 93 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1 94}; 95 96static struct option const longopts[] = 97{ 98 {"tabs", required_argument, NULL, 't'}, 99 {"all", no_argument, NULL, 'a'}, 100 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION}, 101 {GETOPT_HELP_OPTION_DECL}, 102 {GETOPT_VERSION_OPTION_DECL}, 103 {NULL, 0, NULL, 0} 104}; 105 106void 107usage (int status) 108{ 109 if (status != EXIT_SUCCESS) 110 fprintf (stderr, _("Try `%s --help' for more information.\n"), 111 program_name); 112 else 113 { 114 printf (_("\ 115Usage: %s [OPTION]... [FILE]...\n\ 116"), 117 program_name); 118 fputs (_("\ 119Convert blanks in each FILE to tabs, writing to standard output.\n\ 120With no FILE, or when FILE is -, read standard input.\n\ 121\n\ 122"), stdout); 123 fputs (_("\ 124Mandatory arguments to long options are mandatory for short options too.\n\ 125"), stdout); 126 fputs (_("\ 127 -a, --all convert all blanks, instead of just initial blanks\n\ 128 --first-only convert only leading sequences of blanks (overrides -a)\n\ 129 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\ 130 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\ 131"), stdout); 132 fputs (HELP_OPTION_DESCRIPTION, stdout); 133 fputs (VERSION_OPTION_DESCRIPTION, stdout); 134 emit_ancillary_info (); 135 } 136 exit (status); 137} 138 139/* Add tab stop TABVAL to the end of `tab_list'. */ 140 141static void 142add_tab_stop (uintmax_t tabval) 143{ 144 uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0; 145 uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0; 146 147 if (first_free_tab == n_tabs_allocated) 148 tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); 149 tab_list[first_free_tab++] = tabval; 150 151 if (max_column_width < column_width) 152 { 153 if (SIZE_MAX < column_width) 154 error (EXIT_FAILURE, 0, _("tabs are too far apart")); 155 max_column_width = column_width; 156 } 157} 158 159/* Add the comma or blank separated list of tab stops STOPS 160 to the list of tab stops. */ 161 162static void 163parse_tab_stops (char const *stops) 164{ 165 bool have_tabval = false; 166 uintmax_t tabval IF_LINT (= 0); 167 char const *num_start IF_LINT (= NULL); 168 bool ok = true; 169 170 for (; *stops; stops++) 171 { 172 if (*stops == ',' || isblank (to_uchar (*stops))) 173 { 174 if (have_tabval) 175 add_tab_stop (tabval); 176 have_tabval = false; 177 } 178 else if (ISDIGIT (*stops)) 179 { 180 if (!have_tabval) 181 { 182 tabval = 0; 183 have_tabval = true; 184 num_start = stops; 185 } 186 187 /* Detect overflow. */ 188 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) 189 { 190 size_t len = strspn (num_start, "0123456789"); 191 char *bad_num = xstrndup (num_start, len); 192 error (0, 0, _("tab stop is too large %s"), quote (bad_num)); 193 free (bad_num); 194 ok = false; 195 stops = num_start + len - 1; 196 } 197 } 198 else 199 { 200 error (0, 0, _("tab size contains invalid character(s): %s"), 201 quote (stops)); 202 ok = false; 203 break; 204 } 205 } 206 207 if (!ok) 208 exit (EXIT_FAILURE); 209 210 if (have_tabval) 211 add_tab_stop (tabval); 212} 213 214/* Check that the list of tab stops TABS, with ENTRIES entries, 215 contains only nonzero, ascending values. */ 216 217static void 218validate_tab_stops (uintmax_t const *tabs, size_t entries) 219{ 220 uintmax_t prev_tab = 0; 221 size_t i; 222 223 for (i = 0; i < entries; i++) 224 { 225 if (tabs[i] == 0) 226 error (EXIT_FAILURE, 0, _("tab size cannot be 0")); 227 if (tabs[i] <= prev_tab) 228 error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); 229 prev_tab = tabs[i]; 230 } 231} 232 233/* Close the old stream pointer FP if it is non-NULL, 234 and return a new one opened to read the next input file. 235 Open a filename of `-' as the standard input. 236 Return NULL if there are no more input files. */ 237 238static FILE * 239next_file (FILE *fp) 240{ 241 static char *prev_file; 242 char *file; 243 244 if (fp) 245 { 246 if (ferror (fp)) 247 { 248 error (0, errno, "%s", prev_file); 249 exit_status = EXIT_FAILURE; 250 } 251 if (STREQ (prev_file, "-")) 252 clearerr (fp); /* Also clear EOF. */ 253 else if (fclose (fp) != 0) 254 { 255 error (0, errno, "%s", prev_file); 256 exit_status = EXIT_FAILURE; 257 } 258 } 259 260 while ((file = *file_list++) != NULL) 261 { 262 if (STREQ (file, "-")) 263 { 264 have_read_stdin = true; 265 prev_file = file; 266 return stdin; 267 } 268 fp = fopen (file, "r"); 269 if (fp) 270 { 271 prev_file = file; 272 return fp; 273 } 274 error (0, errno, "%s", file); 275 exit_status = EXIT_FAILURE; 276 } 277 return NULL; 278} 279 280/* Change blanks to tabs, writing to stdout. 281 Read each file in `file_list', in order. */ 282 283static void 284unexpand (void) 285{ 286 /* Input stream. */ 287 FILE *fp = next_file (NULL); 288 289 /* The array of pending blanks. In non-POSIX locales, blanks can 290 include characters other than spaces, so the blanks must be 291 stored, not merely counted. */ 292 char *pending_blank; 293 294 if (!fp) 295 return; 296 297 /* The worst case is a non-blank character, then one blank, then a 298 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so 299 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ 300 pending_blank = xmalloc (max_column_width); 301 302 for (;;) 303 { 304 /* Input character, or EOF. */ 305 int c; 306 307 /* If true, perform translations. */ 308 bool convert = true; 309 310 311 /* The following variables have valid values only when CONVERT 312 is true: */ 313 314 /* Column of next input character. */ 315 uintmax_t column = 0; 316 317 /* Column the next input tab stop is on. */ 318 uintmax_t next_tab_column = 0; 319 320 /* Index in TAB_LIST of next tab stop to examine. */ 321 size_t tab_index = 0; 322 323 /* If true, the first pending blank came just before a tab stop. */ 324 bool one_blank_before_tab_stop = false; 325 326 /* If true, the previous input character was a blank. This is 327 initially true, since initial strings of blanks are treated 328 as if the line was preceded by a blank. */ 329 bool prev_blank = true; 330 331 /* Number of pending columns of blanks. */ 332 size_t pending = 0; 333 334 335 /* Convert a line of text. */ 336 337 do 338 { 339 while ((c = getc (fp)) < 0 && (fp = next_file (fp))) 340 continue; 341 342 if (convert) 343 { 344 bool blank = !! isblank (c); 345 346 if (blank) 347 { 348 if (next_tab_column <= column) 349 { 350 if (tab_size) 351 next_tab_column = 352 column + (tab_size - column % tab_size); 353 else 354 for (;;) 355 if (tab_index == first_free_tab) 356 { 357 convert = false; 358 break; 359 } 360 else 361 { 362 uintmax_t tab = tab_list[tab_index++]; 363 if (column < tab) 364 { 365 next_tab_column = tab; 366 break; 367 } 368 } 369 } 370 371 if (convert) 372 { 373 if (next_tab_column < column) 374 error (EXIT_FAILURE, 0, _("input line is too long")); 375 376 if (c == '\t') 377 { 378 column = next_tab_column; 379 380 /* Discard pending blanks, unless it was a single 381 blank just before the previous tab stop. */ 382 if (! (pending == 1 && one_blank_before_tab_stop)) 383 { 384 pending = 0; 385 one_blank_before_tab_stop = false; 386 } 387 } 388 else 389 { 390 column++; 391 392 if (! (prev_blank && column == next_tab_column)) 393 { 394 /* It is not yet known whether the pending blanks 395 will be replaced by tabs. */ 396 if (column == next_tab_column) 397 one_blank_before_tab_stop = true; 398 pending_blank[pending++] = c; 399 prev_blank = true; 400 continue; 401 } 402 403 /* Replace the pending blanks by a tab or two. */ 404 pending_blank[0] = c = '\t'; 405 pending = one_blank_before_tab_stop; 406 } 407 } 408 } 409 else if (c == '\b') 410 { 411 /* Go back one column, and force recalculation of the 412 next tab stop. */ 413 column -= !!column; 414 next_tab_column = column; 415 tab_index -= !!tab_index; 416 } 417 else 418 { 419 column++; 420 if (!column) 421 error (EXIT_FAILURE, 0, _("input line is too long")); 422 } 423 424 if (pending) 425 { 426 if (fwrite (pending_blank, 1, pending, stdout) != pending) 427 error (EXIT_FAILURE, errno, _("write error")); 428 pending = 0; 429 one_blank_before_tab_stop = false; 430 } 431 432 prev_blank = blank; 433 convert &= convert_entire_line || blank; 434 } 435 436 if (c < 0) 437 { 438 free (pending_blank); 439 return; 440 } 441 442 if (putchar (c) < 0) 443 error (EXIT_FAILURE, errno, _("write error")); 444 } 445 while (c != '\n'); 446 } 447} 448 449int 450main (int argc, char **argv) 451{ 452 bool have_tabval = false; 453 uintmax_t tabval IF_LINT (= 0); 454 int c; 455 456 /* If true, cancel the effect of any -a (explicit or implicit in -t), 457 so that only leading blanks will be considered. */ 458 bool convert_first_only = false; 459 460 initialize_main (&argc, &argv); 461 set_program_name (argv[0]); 462 setlocale (LC_ALL, ""); 463 bindtextdomain (PACKAGE, LOCALEDIR); 464 textdomain (PACKAGE); 465 466 atexit (close_stdout); 467 468 have_read_stdin = false; 469 exit_status = EXIT_SUCCESS; 470 convert_entire_line = false; 471 tab_list = NULL; 472 first_free_tab = 0; 473 474 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL)) 475 != -1) 476 { 477 switch (c) 478 { 479 case '?': 480 usage (EXIT_FAILURE); 481 case 'a': 482 convert_entire_line = true; 483 break; 484 case 't': 485 convert_entire_line = true; 486 parse_tab_stops (optarg); 487 break; 488 case CONVERT_FIRST_ONLY_OPTION: 489 convert_first_only = true; 490 break; 491 case ',': 492 if (have_tabval) 493 add_tab_stop (tabval); 494 have_tabval = false; 495 break; 496 case_GETOPT_HELP_CHAR; 497 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 498 default: 499 if (!have_tabval) 500 { 501 tabval = 0; 502 have_tabval = true; 503 } 504 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t)) 505 error (EXIT_FAILURE, 0, _("tab stop value is too large")); 506 break; 507 } 508 } 509 510 if (convert_first_only) 511 convert_entire_line = false; 512 513 if (have_tabval) 514 add_tab_stop (tabval); 515 516 validate_tab_stops (tab_list, first_free_tab); 517 518 if (first_free_tab == 0) 519 tab_size = max_column_width = 8; 520 else if (first_free_tab == 1) 521 tab_size = tab_list[0]; 522 else 523 tab_size = 0; 524 525 file_list = (optind < argc ? &argv[optind] : stdin_argv); 526 527 unexpand (); 528 529 if (have_read_stdin && fclose (stdin) != 0) 530 error (EXIT_FAILURE, errno, "-"); 531 532 exit (exit_status); 533} 534