1/* $NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $ */ 2 3/*- 4 * SPDX-License-Identifier: BSD-3-Clause 5 * 6 * Copyright (c) 1990, 1993, 1994 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Michael Rendell of the Memorial University of Newfoundland. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38#ifndef lint 39__COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\ 40 The Regents of the University of California. All rights reserved."); 41#endif /* not lint */ 42 43#ifndef lint 44#if 0 45static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95"; 46__FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $") 47; 48 49#endif 50__RCSID("$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $"); 51#endif /* not lint */ 52 53#include <err.h> 54#include <errno.h> 55#include <inttypes.h> 56#include <limits.h> 57#include <locale.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <termios.h> 62#include <unistd.h> 63#include <wchar.h> 64#include <wctype.h> 65 66#define BS '\b' /* backspace */ 67#define TAB '\t' /* tab */ 68#define SPACE ' ' /* space */ 69#define NL '\n' /* newline */ 70#define CR '\r' /* carriage return */ 71#define ESC '\033' /* escape */ 72#define SI '\017' /* shift in to normal character set */ 73#define SO '\016' /* shift out to alternate character set */ 74#define VT '\013' /* vertical tab (aka reverse line feed) */ 75#define RLF '7' /* ESC-7 reverse line feed */ 76#define RHLF '8' /* ESC-8 reverse half-line feed */ 77#define FHLF '9' /* ESC-9 forward half-line feed */ 78 79/* build up at least this many lines before flushing them out */ 80#define BUFFER_MARGIN 32 81 82typedef char CSET; 83 84typedef struct char_str { 85#define CS_NORMAL 1 86#define CS_ALTERNATE 2 87 int c_column; /* column character is in */ 88 CSET c_set; /* character set (currently only 2) */ 89 wchar_t c_char; /* character in question */ 90 int c_width; /* character width */ 91} CHAR; 92 93typedef struct line_str LINE; 94struct line_str { 95 CHAR *l_line; /* characters on the line */ 96 LINE *l_prev; /* previous line */ 97 LINE *l_next; /* next line */ 98 int l_lsize; /* allocated sizeof l_line */ 99 int l_line_len; /* strlen(l_line) */ 100 int l_needs_sort; /* set if chars went in out of order */ 101 int l_max_col; /* max column in the line */ 102}; 103 104static void addto_lineno(int *, int); 105static LINE *alloc_line(void); 106static void dowarn(int); 107static void flush_line(LINE *); 108static void flush_lines(int); 109static void flush_blanks(void); 110static void free_line(LINE *); 111__dead static void usage(void); 112 113static CSET last_set; /* char_set of last char printed */ 114static LINE *lines; 115static int compress_spaces; /* if doing space -> tab conversion */ 116static int fine; /* if `fine' resolution (half lines) */ 117static int max_bufd_lines; /* max # of half lines to keep in memory */ 118static int nblank_lines; /* # blanks after last flushed line */ 119static int no_backspaces; /* if not to output any backspaces */ 120static int pass_unknown_seqs; /* pass unknown control sequences */ 121 122#define PUTC(ch) \ 123 do { \ 124 if (putwchar(ch) == WEOF) \ 125 errx(EXIT_FAILURE, "write error"); \ 126 } while (0) 127 128int 129main(int argc, char **argv) 130{ 131 wint_t ch; 132 CHAR *c; 133 CSET cur_set; /* current character set */ 134 LINE *l; /* current line */ 135 int extra_lines; /* # of lines above first line */ 136 int cur_col; /* current column */ 137 int cur_line; /* line number of current position */ 138 int max_line; /* max value of cur_line */ 139 int this_line; /* line l points to */ 140 int nflushd_lines; /* number of lines that were flushed */ 141 int adjust, opt, warned, width; 142 int e; 143 144 (void)setlocale(LC_CTYPE, ""); 145 146 max_bufd_lines = 256; 147 compress_spaces = 1; /* compress spaces into tabs */ 148 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 149 switch (opt) { 150 case 'b': /* do not output backspaces */ 151 no_backspaces = 1; 152 break; 153 case 'f': /* allow half forward line feeds */ 154 fine = 1; 155 break; 156 case 'h': /* compress spaces into tabs */ 157 compress_spaces = 1; 158 break; 159 case 'l': /* buffered line count */ 160 max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1, 161 (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2; 162 if (e) 163 errc(EXIT_FAILURE, e, "bad -l argument `%s'", 164 optarg); 165 break; 166 case 'p': /* pass unknown control sequences */ 167 pass_unknown_seqs = 1; 168 break; 169 case 'x': /* do not compress spaces into tabs */ 170 compress_spaces = 0; 171 break; 172 case '?': 173 default: 174 usage(); 175 } 176 177 if (optind != argc) 178 usage(); 179 180 adjust = cur_col = extra_lines = warned = 0; 181 cur_line = max_line = nflushd_lines = this_line = 0; 182 cur_set = last_set = CS_NORMAL; 183 lines = l = alloc_line(); 184 185 while ((ch = getwchar()) != WEOF) { 186 if (!iswgraph(ch)) { 187 switch (ch) { 188 case BS: /* can't go back further */ 189 if (cur_col == 0) 190 continue; 191 --cur_col; 192 continue; 193 case CR: 194 cur_col = 0; 195 continue; 196 case ESC: /* just ignore EOF */ 197 switch(getwchar()) { 198 /* 199 * In the input stream, accept both the 200 * XPG5 sequences ESC-digit and the 201 * traditional BSD sequences ESC-ctrl. 202 */ 203 case '\007': 204 /* FALLTHROUGH */ 205 case RLF: 206 addto_lineno(&cur_line, -2); 207 break; 208 case '\010': 209 /* FALLTHROUGH */ 210 case RHLF: 211 addto_lineno(&cur_line, -1); 212 break; 213 case '\011': 214 /* FALLTHROUGH */ 215 case FHLF: 216 addto_lineno(&cur_line, 1); 217 if (cur_line > max_line) 218 max_line = cur_line; 219 } 220 continue; 221 case NL: 222 addto_lineno(&cur_line, 2); 223 if (cur_line > max_line) 224 max_line = cur_line; 225 cur_col = 0; 226 continue; 227 case SPACE: 228 ++cur_col; 229 continue; 230 case SI: 231 cur_set = CS_NORMAL; 232 continue; 233 case SO: 234 cur_set = CS_ALTERNATE; 235 continue; 236 case TAB: /* adjust column */ 237 cur_col |= 7; 238 ++cur_col; 239 continue; 240 case VT: 241 addto_lineno(&cur_line, -2); 242 continue; 243 } 244 if (iswspace(ch)) { 245 if ((width = wcwidth(ch)) > 0) 246 cur_col += width; 247 continue; 248 } 249 if (!pass_unknown_seqs) 250 continue; 251 } 252 253 /* Must stuff ch in a line - are we at the right one? */ 254 if (cur_line + adjust != this_line) { 255 LINE *lnew; 256 257 /* round up to next line */ 258 adjust = !fine && (cur_line & 1); 259 260 if (cur_line + adjust < this_line) { 261 while (cur_line + adjust < this_line && 262 l->l_prev != NULL) { 263 l = l->l_prev; 264 this_line--; 265 } 266 if (cur_line + adjust < this_line) { 267 if (nflushd_lines == 0) { 268 /* 269 * Allow backup past first 270 * line if nothing has been 271 * flushed yet. 272 */ 273 while (cur_line + adjust 274 < this_line) { 275 lnew = alloc_line(); 276 l->l_prev = lnew; 277 lnew->l_next = l; 278 l = lines = lnew; 279 extra_lines++; 280 this_line--; 281 } 282 } else { 283 if (!warned++) 284 dowarn(cur_line); 285 cur_line = this_line - adjust; 286 } 287 } 288 } else { 289 /* may need to allocate here */ 290 while (cur_line + adjust > this_line) { 291 if (l->l_next == NULL) { 292 l->l_next = alloc_line(); 293 l->l_next->l_prev = l; 294 } 295 l = l->l_next; 296 this_line++; 297 } 298 } 299 if (this_line > nflushd_lines && 300 this_line - nflushd_lines >= 301 max_bufd_lines + BUFFER_MARGIN) { 302 if (extra_lines) { 303 flush_lines(extra_lines); 304 extra_lines = 0; 305 } 306 flush_lines(this_line - nflushd_lines - 307 max_bufd_lines); 308 nflushd_lines = this_line - max_bufd_lines; 309 } 310 } 311 /* grow line's buffer? */ 312 if (l->l_line_len + 1 >= l->l_lsize) { 313 int need; 314 315 need = l->l_lsize ? l->l_lsize * 2 : 90; 316 if ((l->l_line = realloc(l->l_line, 317 (unsigned)need * sizeof(CHAR))) == NULL) 318 err(EXIT_FAILURE, NULL); 319 l->l_lsize = need; 320 } 321 c = &l->l_line[l->l_line_len++]; 322 c->c_char = ch; 323 c->c_set = cur_set; 324 c->c_column = cur_col; 325 c->c_width = wcwidth(ch); 326 /* 327 * If things are put in out of order, they will need sorting 328 * when it is flushed. 329 */ 330 if (cur_col < l->l_max_col) 331 l->l_needs_sort = 1; 332 else 333 l->l_max_col = cur_col; 334 if (c->c_width > 0) 335 cur_col += c->c_width; 336 } 337 if (ferror(stdin)) 338 err(EXIT_FAILURE, NULL); 339 if (extra_lines) { 340 /* 341 * Extra lines only exist if no lines have been flushed 342 * yet. This means that 'lines' must point to line zero 343 * after we flush the extra lines. 344 */ 345 flush_lines(extra_lines); 346 l = lines; 347 this_line = 0; 348 } 349 350 /* goto the last line that had a character on it */ 351 for (; l->l_next; l = l->l_next) 352 this_line++; 353 flush_lines(this_line - nflushd_lines + 1); 354 355 /* make sure we leave things in a sane state */ 356 if (last_set != CS_NORMAL) 357 PUTC(SI); 358 359 /* flush out the last few blank lines */ 360 if (max_line >= this_line) 361 nblank_lines = max_line - this_line + (max_line & 1); 362 if (nblank_lines == 0) 363 /* end with a newline even if the source doesn't */ 364 nblank_lines = 2; 365 flush_blanks(); 366 exit(EXIT_SUCCESS); 367} 368 369/* 370 * Prints the first 'nflush' lines. Printed lines are freed. 371 * After this function returns, 'lines' points to the first 372 * of the remaining lines, and 'nblank_lines' will have the 373 * number of half line feeds between the final flushed line 374 * and the first remaining line. 375 */ 376static void 377flush_lines(int nflush) 378{ 379 LINE *l; 380 381 while (--nflush >= 0) { 382 l = lines; 383 lines = l->l_next; 384 if (l->l_line) { 385 flush_blanks(); 386 flush_line(l); 387 free(l->l_line); 388 } 389 if (l->l_next) 390 nblank_lines++; 391 free_line(l); 392 } 393 if (lines) 394 lines->l_prev = NULL; 395} 396 397/* 398 * Print a number of newline/half newlines. 399 * nblank_lines is the number of half line feeds. 400 */ 401static void 402flush_blanks(void) 403{ 404 int half, i, nb; 405 406 half = 0; 407 nb = nblank_lines; 408 if (nb & 1) { 409 if (fine) 410 half = 1; 411 else 412 nb++; 413 } 414 nb /= 2; 415 for (i = nb; --i >= 0;) 416 PUTC('\n'); 417 if (half) { 418 PUTC(ESC); 419 PUTC(FHLF); 420 if (!nb) 421 PUTC('\r'); 422 } 423 nblank_lines = 0; 424} 425 426/* 427 * Write a line to stdout taking care of space to tab conversion (-h flag) 428 * and character set shifts. 429 */ 430static void 431flush_line(LINE *l) 432{ 433 CHAR *c, *endc; 434 int i, j, nchars, last_col, save, this_col, tot; 435 436 last_col = 0; 437 nchars = l->l_line_len; 438 439 if (l->l_needs_sort) { 440 static CHAR *sorted; 441 static int count_size, *count, sorted_size; 442 443 /* 444 * Do an O(n) sort on l->l_line by column being careful to 445 * preserve the order of characters in the same column. 446 */ 447 if (l->l_lsize > sorted_size) { 448 sorted_size = l->l_lsize; 449 if ((sorted = realloc(sorted, 450 sizeof(CHAR) * (size_t)sorted_size)) == NULL) 451 err(EXIT_FAILURE, NULL); 452 } 453 if (l->l_max_col >= count_size) { 454 count_size = l->l_max_col + 1; 455 if ((count = realloc(count, 456 sizeof(int) * (size_t)count_size)) == NULL) 457 err(EXIT_FAILURE, NULL); 458 } 459 memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1); 460 for (i = nchars, c = l->l_line; --i >= 0; c++) 461 count[c->c_column]++; 462 463 /* 464 * calculate running total (shifted down by 1) to use as 465 * indices into new line. 466 */ 467 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 468 save = count[i]; 469 count[i] = tot; 470 tot += save; 471 } 472 473 for (i = nchars, c = l->l_line; --i >= 0; c++) 474 sorted[count[c->c_column]++] = *c; 475 c = sorted; 476 } else 477 c = l->l_line; 478 while (nchars > 0) { 479 this_col = c->c_column; 480 endc = c; 481 do { 482 ++endc; 483 } while (--nchars > 0 && this_col == endc->c_column); 484 485 /* if -b only print last character */ 486 if (no_backspaces) { 487 c = endc - 1; 488 if (nchars > 0 && 489 this_col + c->c_width > endc->c_column) 490 continue; 491 } 492 493 if (this_col > last_col) { 494 int nspace = this_col - last_col; 495 496 if (compress_spaces && nspace > 1) { 497 while (1) { 498 int tab_col, tab_size; 499 500 tab_col = (last_col + 8) & ~7; 501 if (tab_col > this_col) 502 break; 503 tab_size = tab_col - last_col; 504 if (tab_size == 1) 505 PUTC(' '); 506 else 507 PUTC('\t'); 508 nspace -= tab_size; 509 last_col = tab_col; 510 } 511 } 512 while (--nspace >= 0) 513 PUTC(' '); 514 last_col = this_col; 515 } 516 517 for (;;) { 518 if (c->c_set != last_set) { 519 switch (c->c_set) { 520 case CS_NORMAL: 521 PUTC(SI); 522 break; 523 case CS_ALTERNATE: 524 PUTC(SO); 525 } 526 last_set = c->c_set; 527 } 528 PUTC(c->c_char); 529 if ((c + 1) < endc) 530 for (j = 0; j < c->c_width; j++) 531 PUTC('\b'); 532 if (++c >= endc) 533 break; 534 } 535 last_col += (c - 1)->c_width; 536 } 537} 538 539/* 540 * Increment or decrement a line number, checking for overflow. 541 * Stop one below INT_MAX such that the adjust variable is safe. 542 */ 543void 544addto_lineno(int *lno, int offset) 545{ 546 if (offset > 0) { 547 if (*lno >= INT_MAX - offset) 548 errx(EXIT_FAILURE, "too many lines"); 549 } else { 550 if (*lno < INT_MIN - offset) 551 errx(EXIT_FAILURE, "too many reverse line feeds"); 552 } 553 *lno += offset; 554} 555 556#define NALLOC 64 557 558static LINE *line_freelist; 559 560static LINE * 561alloc_line(void) 562{ 563 LINE *l; 564 int i; 565 566 if (!line_freelist) { 567 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 568 err(EXIT_FAILURE, NULL); 569 line_freelist = l; 570 for (i = 1; i < NALLOC; i++, l++) 571 l->l_next = l + 1; 572 l->l_next = NULL; 573 } 574 l = line_freelist; 575 line_freelist = l->l_next; 576 577 memset(l, 0, sizeof(LINE)); 578 return (l); 579} 580 581static void 582free_line(LINE *l) 583{ 584 585 l->l_next = line_freelist; 586 line_freelist = l; 587} 588 589static void 590usage(void) 591{ 592 593 (void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname()); 594 exit(EXIT_FAILURE); 595} 596 597static void 598dowarn(int line) 599{ 600 601 warnx("warning: can't back up %s", 602 line < 0 ? "past first line" : "-- line already flushed"); 603} 604