col.c revision 132824
1/*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of the Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1990, 1993, 1994\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41#endif 42 43#if 0 44#ifndef lint 45static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95"; 46#endif 47#endif 48 49#include <sys/cdefs.h> 50__FBSDID("$FreeBSD: head/usr.bin/col/col.c 132824 2004-07-29 07:23:37Z tjr $"); 51 52#include <err.h> 53#include <locale.h> 54#include <string.h> 55#include <stdio.h> 56#include <stdlib.h> 57#include <unistd.h> 58#include <locale.h> 59#include <wchar.h> 60#include <wctype.h> 61 62#define BS '\b' /* backspace */ 63#define TAB '\t' /* tab */ 64#define SPACE ' ' /* space */ 65#define NL '\n' /* newline */ 66#define CR '\r' /* carriage return */ 67#define ESC '\033' /* escape */ 68#define SI '\017' /* shift in to normal character set */ 69#define SO '\016' /* shift out to alternate character set */ 70#define VT '\013' /* vertical tab (aka reverse line feed) */ 71#define RLF '\007' /* ESC-07 reverse line feed */ 72#define RHLF '\010' /* ESC-010 reverse half-line feed */ 73#define FHLF '\011' /* ESC-011 forward half-line feed */ 74 75/* build up at least this many lines before flushing them out */ 76#define BUFFER_MARGIN 32 77 78typedef char CSET; 79 80typedef struct char_str { 81#define CS_NORMAL 1 82#define CS_ALTERNATE 2 83 short c_column; /* column character is in */ 84 CSET c_set; /* character set (currently only 2) */ 85 wchar_t c_char; /* character in question */ 86 int c_width; /* character width */ 87} CHAR; 88 89typedef struct line_str LINE; 90struct line_str { 91 CHAR *l_line; /* characters on the line */ 92 LINE *l_prev; /* previous line */ 93 LINE *l_next; /* next line */ 94 int l_lsize; /* allocated sizeof l_line */ 95 int l_line_len; /* strlen(l_line) */ 96 int l_needs_sort; /* set if chars went in out of order */ 97 int l_max_col; /* max column in the line */ 98}; 99 100LINE *alloc_line(void); 101void dowarn(int); 102void flush_line(LINE *); 103void flush_lines(int); 104void flush_blanks(void); 105void free_line(LINE *); 106void usage(void); 107 108CSET last_set; /* char_set of last char printed */ 109LINE *lines; 110int compress_spaces; /* if doing space -> tab conversion */ 111int fine; /* if `fine' resolution (half lines) */ 112int max_bufd_lines; /* max # lines to keep in memory */ 113int nblank_lines; /* # blanks after last flushed line */ 114int no_backspaces; /* if not to output any backspaces */ 115int pass_unknown_seqs; /* pass unknown control sequences */ 116 117#define PUTC(ch) \ 118 do { \ 119 if (putwchar(ch) == WEOF) \ 120 errx(1, "write error"); \ 121 } while (0) 122 123int 124main(int argc, char **argv) 125{ 126 wint_t ch; 127 CHAR *c; 128 CSET cur_set; /* current character set */ 129 LINE *l; /* current line */ 130 int extra_lines; /* # of lines above first line */ 131 int cur_col; /* current column */ 132 int cur_line; /* line number of current position */ 133 int max_line; /* max value of cur_line */ 134 int this_line; /* line l points to */ 135 int nflushd_lines; /* number of lines that were flushed */ 136 int adjust, opt, warned, width; 137 138 (void)setlocale(LC_CTYPE, ""); 139 140 max_bufd_lines = 128; 141 compress_spaces = 1; /* compress spaces into tabs */ 142 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 143 switch (opt) { 144 case 'b': /* do not output backspaces */ 145 no_backspaces = 1; 146 break; 147 case 'f': /* allow half forward line feeds */ 148 fine = 1; 149 break; 150 case 'h': /* compress spaces into tabs */ 151 compress_spaces = 1; 152 break; 153 case 'l': /* buffered line count */ 154 if ((max_bufd_lines = atoi(optarg)) <= 0) 155 errx(1, "bad -l argument %s", optarg); 156 break; 157 case 'p': /* pass unknown control sequences */ 158 pass_unknown_seqs = 1; 159 break; 160 case 'x': /* do not compress spaces into tabs */ 161 compress_spaces = 0; 162 break; 163 case '?': 164 default: 165 usage(); 166 } 167 168 if (optind != argc) 169 usage(); 170 171 /* this value is in half lines */ 172 max_bufd_lines *= 2; 173 174 adjust = cur_col = extra_lines = warned = 0; 175 cur_line = max_line = nflushd_lines = this_line = 0; 176 cur_set = last_set = CS_NORMAL; 177 lines = l = alloc_line(); 178 179 while ((ch = getwchar()) != WEOF) { 180 if (!iswgraph(ch)) { 181 switch (ch) { 182 case BS: /* can't go back further */ 183 if (cur_col == 0) 184 continue; 185 --cur_col; 186 continue; 187 case CR: 188 cur_col = 0; 189 continue; 190 case ESC: /* just ignore EOF */ 191 switch(getwchar()) { 192 case RLF: 193 cur_line -= 2; 194 break; 195 case RHLF: 196 cur_line--; 197 break; 198 case FHLF: 199 cur_line++; 200 if (cur_line > max_line) 201 max_line = cur_line; 202 } 203 continue; 204 case NL: 205 cur_line += 2; 206 if (cur_line > max_line) 207 max_line = cur_line; 208 cur_col = 0; 209 continue; 210 case SPACE: 211 ++cur_col; 212 continue; 213 case SI: 214 cur_set = CS_NORMAL; 215 continue; 216 case SO: 217 cur_set = CS_ALTERNATE; 218 continue; 219 case TAB: /* adjust column */ 220 cur_col |= 7; 221 ++cur_col; 222 continue; 223 case VT: 224 cur_line -= 2; 225 continue; 226 } 227 if (iswspace(ch)) { 228 if ((width = wcwidth(ch)) > 0) 229 cur_col += width; 230 continue; 231 } 232 if (!pass_unknown_seqs) 233 continue; 234 } 235 236 /* Must stuff ch in a line - are we at the right one? */ 237 if (cur_line != this_line - adjust) { 238 LINE *lnew; 239 int nmove; 240 241 adjust = 0; 242 nmove = cur_line - this_line; 243 if (!fine) { 244 /* round up to next line */ 245 if (cur_line & 1) { 246 adjust = 1; 247 nmove++; 248 } 249 } 250 if (nmove < 0) { 251 for (; nmove < 0 && l->l_prev; nmove++) 252 l = l->l_prev; 253 if (nmove) { 254 if (nflushd_lines == 0) { 255 /* 256 * Allow backup past first 257 * line if nothing has been 258 * flushed yet. 259 */ 260 for (; nmove < 0; nmove++) { 261 lnew = alloc_line(); 262 l->l_prev = lnew; 263 lnew->l_next = l; 264 l = lines = lnew; 265 extra_lines++; 266 } 267 } else { 268 if (!warned++) 269 dowarn(cur_line); 270 cur_line -= nmove; 271 } 272 } 273 } else { 274 /* may need to allocate here */ 275 for (; nmove > 0 && l->l_next; nmove--) 276 l = l->l_next; 277 for (; nmove > 0; nmove--) { 278 lnew = alloc_line(); 279 lnew->l_prev = l; 280 l->l_next = lnew; 281 l = lnew; 282 } 283 } 284 this_line = cur_line + adjust; 285 nmove = this_line - nflushd_lines; 286 if (nmove >= max_bufd_lines + BUFFER_MARGIN) { 287 nflushd_lines += nmove - max_bufd_lines; 288 flush_lines(nmove - max_bufd_lines); 289 } 290 } 291 /* grow line's buffer? */ 292 if (l->l_line_len + 1 >= l->l_lsize) { 293 int need; 294 295 need = l->l_lsize ? l->l_lsize * 2 : 90; 296 if ((l->l_line = realloc(l->l_line, 297 (unsigned)need * sizeof(CHAR))) == NULL) 298 err(1, (char *)NULL); 299 l->l_lsize = need; 300 } 301 c = &l->l_line[l->l_line_len++]; 302 c->c_char = ch; 303 c->c_set = cur_set; 304 c->c_column = cur_col; 305 c->c_width = wcwidth(ch); 306 /* 307 * If things are put in out of order, they will need sorting 308 * when it is flushed. 309 */ 310 if (cur_col < l->l_max_col) 311 l->l_needs_sort = 1; 312 else 313 l->l_max_col = cur_col; 314 if (c->c_width > 0) 315 cur_col += c->c_width; 316 } 317 if (ferror(stdin)) 318 err(1, NULL); 319 if (max_line == 0) 320 exit(0); /* no lines, so just exit */ 321 322 /* goto the last line that had a character on it */ 323 for (; l->l_next; l = l->l_next) 324 this_line++; 325 flush_lines(this_line - nflushd_lines + extra_lines + 1); 326 327 /* make sure we leave things in a sane state */ 328 if (last_set != CS_NORMAL) 329 PUTC('\017'); 330 331 /* flush out the last few blank lines */ 332 nblank_lines = max_line - this_line; 333 if (max_line & 1) 334 nblank_lines++; 335 else if (!nblank_lines) 336 /* missing a \n on the last line? */ 337 nblank_lines = 2; 338 flush_blanks(); 339 exit(0); 340} 341 342void 343flush_lines(int nflush) 344{ 345 LINE *l; 346 347 while (--nflush >= 0) { 348 l = lines; 349 lines = l->l_next; 350 if (l->l_line) { 351 flush_blanks(); 352 flush_line(l); 353 } 354 nblank_lines++; 355 if (l->l_line) 356 (void)free(l->l_line); 357 free_line(l); 358 } 359 if (lines) 360 lines->l_prev = NULL; 361} 362 363/* 364 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 365 * is the number of half line feeds, otherwise it is the number of whole line 366 * feeds. 367 */ 368void 369flush_blanks(void) 370{ 371 int half, i, nb; 372 373 half = 0; 374 nb = nblank_lines; 375 if (nb & 1) { 376 if (fine) 377 half = 1; 378 else 379 nb++; 380 } 381 nb /= 2; 382 for (i = nb; --i >= 0;) 383 PUTC('\n'); 384 if (half) { 385 PUTC('\033'); 386 PUTC('9'); 387 if (!nb) 388 PUTC('\r'); 389 } 390 nblank_lines = 0; 391} 392 393/* 394 * Write a line to stdout taking care of space to tab conversion (-h flag) 395 * and character set shifts. 396 */ 397void 398flush_line(LINE *l) 399{ 400 CHAR *c, *endc; 401 int i, nchars, last_col, this_col; 402 403 last_col = 0; 404 nchars = l->l_line_len; 405 406 if (l->l_needs_sort) { 407 static CHAR *sorted; 408 static int count_size, *count, i, save, sorted_size, tot; 409 410 /* 411 * Do an O(n) sort on l->l_line by column being careful to 412 * preserve the order of characters in the same column. 413 */ 414 if (l->l_lsize > sorted_size) { 415 sorted_size = l->l_lsize; 416 if ((sorted = realloc(sorted, 417 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 418 err(1, (char *)NULL); 419 } 420 if (l->l_max_col >= count_size) { 421 count_size = l->l_max_col + 1; 422 if ((count = realloc(count, 423 (unsigned)sizeof(int) * count_size)) == NULL) 424 err(1, (char *)NULL); 425 } 426 memset(count, 0, sizeof(int) * l->l_max_col + 1); 427 for (i = nchars, c = l->l_line; --i >= 0; c++) 428 count[c->c_column]++; 429 430 /* 431 * calculate running total (shifted down by 1) to use as 432 * indices into new line. 433 */ 434 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 435 save = count[i]; 436 count[i] = tot; 437 tot += save; 438 } 439 440 for (i = nchars, c = l->l_line; --i >= 0; c++) 441 sorted[count[c->c_column]++] = *c; 442 c = sorted; 443 } else 444 c = l->l_line; 445 while (nchars > 0) { 446 this_col = c->c_column; 447 endc = c; 448 do { 449 ++endc; 450 } while (--nchars > 0 && this_col == endc->c_column); 451 452 /* if -b only print last character */ 453 if (no_backspaces) { 454 c = endc - 1; 455 if (nchars > 0 && 456 this_col + c->c_width > endc->c_column) 457 continue; 458 } 459 460 if (this_col > last_col) { 461 int nspace = this_col - last_col; 462 463 if (compress_spaces && nspace > 1) { 464 while (1) { 465 int tab_col, tab_size;; 466 467 tab_col = (last_col + 8) & ~7; 468 if (tab_col > this_col) 469 break; 470 tab_size = tab_col - last_col; 471 if (tab_size == 1) 472 PUTC(' '); 473 else 474 PUTC('\t'); 475 nspace -= tab_size; 476 last_col = tab_col; 477 } 478 } 479 while (--nspace >= 0) 480 PUTC(' '); 481 last_col = this_col; 482 } 483 484 for (;;) { 485 if (c->c_set != last_set) { 486 switch (c->c_set) { 487 case CS_NORMAL: 488 PUTC('\017'); 489 break; 490 case CS_ALTERNATE: 491 PUTC('\016'); 492 } 493 last_set = c->c_set; 494 } 495 PUTC(c->c_char); 496 if ((c + 1) < endc) 497 for (i = 0; i < c->c_width; i++) 498 PUTC('\b'); 499 if (++c >= endc) 500 break; 501 } 502 last_col += (c - 1)->c_width; 503 } 504} 505 506#define NALLOC 64 507 508static LINE *line_freelist; 509 510LINE * 511alloc_line(void) 512{ 513 LINE *l; 514 int i; 515 516 if (!line_freelist) { 517 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 518 err(1, (char *)NULL); 519 line_freelist = l; 520 for (i = 1; i < NALLOC; i++, l++) 521 l->l_next = l + 1; 522 l->l_next = NULL; 523 } 524 l = line_freelist; 525 line_freelist = l->l_next; 526 527 memset(l, 0, sizeof(LINE)); 528 return (l); 529} 530 531void 532free_line(LINE *l) 533{ 534 535 l->l_next = line_freelist; 536 line_freelist = l; 537} 538 539void 540usage(void) 541{ 542 543 (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 544 exit(1); 545} 546 547void 548dowarn(int line) 549{ 550 551 warnx("warning: can't back up %s", 552 line < 0 ? "past first line" : "-- line already flushed"); 553} 554