1/* $NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $ */ 2 3/* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33#ifndef lint 34__COPYRIGHT("@(#) Copyright (c) 1980, 1993\ 35 The Regents of the University of California. All rights reserved."); 36#endif /* not lint */ 37 38#ifndef lint 39#if 0 40static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93"; 41#endif 42__RCSID("$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $"); 43#endif /* not lint */ 44 45#include <wctype.h> 46#include <locale.h> 47#include <stdio.h> 48#include <stdlib.h> 49#include <unistd.h> 50#include <errno.h> 51#include <err.h> 52#include <limits.h> 53#include <string.h> 54#include <locale.h> 55#include "buffer.h" 56 57/* 58 * fmt -- format the concatenation of input files or standard input 59 * onto standard output. Designed for use with Mail ~| 60 * 61 * Syntax : fmt [ goal [ max ] ] [ name ... ] 62 * Authors: Kurt Shoens (UCB) 12/7/78; 63 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept]. 64 */ 65 66/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */ 67#define GOAL_LENGTH 65 68#define MAX_LENGTH 75 69static size_t goal_length; /* Target or goal line length in output */ 70static size_t max_length; /* Max line length in output */ 71static size_t pfx; /* Current leading blank count */ 72static int raw; /* Don't treat mail specially */ 73static int lineno; /* Current input line */ 74static int mark; /* Last place we saw a head line */ 75static int center; 76static struct buffer outbuf; 77 78static const wchar_t *headnames[] = { L"To", L"Subject", L"Cc", NULL }; 79 80static void usage(void) __dead; 81static int getnum(const char *, const char *, size_t *, int); 82static void fmt(FILE *); 83static int ispref(const wchar_t *, const wchar_t *); 84static void leadin(void); 85static void oflush(void); 86static void pack(const wchar_t *, size_t); 87static void prefix(const struct buffer *, int); 88static void split(const wchar_t *, int); 89static void tabulate(struct buffer *); 90 91 92int ishead(const wchar_t *); 93 94/* 95 * Drive the whole formatter by managing input files. Also, 96 * cause initialization of the output stuff and flush it out 97 * at the end. 98 */ 99 100int 101main(int argc, char **argv) 102{ 103 FILE *fi; 104 int errs = 0; 105 int compat = 1; 106 int c; 107 108 goal_length = GOAL_LENGTH; 109 max_length = MAX_LENGTH; 110 buf_init(&outbuf); 111 lineno = 1; 112 mark = -10; 113 114 setprogname(*argv); 115 (void)setlocale(LC_ALL, ""); 116 117 while ((c = getopt(argc, argv, "Cg:m:rw:")) != -1) 118 switch (c) { 119 case 'C': 120 center++; 121 break; 122 case 'g': 123 (void)getnum(optarg, "goal", &goal_length, 1); 124 compat = 0; 125 break; 126 case 'm': 127 case 'w': 128 (void)getnum(optarg, "max", &max_length, 1); 129 compat = 0; 130 break; 131 case 'r': 132 raw++; 133 break; 134 default: 135 usage(); 136 } 137 138 argc -= optind; 139 argv += optind; 140 141 /* 142 * compatibility with old usage. 143 */ 144 if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) { 145 argv++; 146 argc--; 147 if (argc > 0 && getnum(*argv, "max", &max_length, 0)) { 148 argv++; 149 argc--; 150 } 151 } 152 153 if (max_length <= goal_length) { 154 errx(1, "Max length (%zu) must be greater than goal " 155 "length (%zu)", max_length, goal_length); 156 } 157 if (argc == 0) { 158 fmt(stdin); 159 oflush(); 160 return 0; 161 } 162 for (;argc; argc--, argv++) { 163 if ((fi = fopen(*argv, "r")) == NULL) { 164 warn("Cannot open `%s'", *argv); 165 errs++; 166 continue; 167 } 168 fmt(fi); 169 (void)fclose(fi); 170 } 171 oflush(); 172 buf_end(&outbuf); 173 return errs; 174} 175 176static void 177usage(void) 178{ 179 (void)fprintf(stderr, 180 "Usage: %s [-Cr] [-g <goal>] [-m|w <max>] [<files>..]\n" 181 "\t %s [-Cr] [<goal>] [<max>] [<files>]\n", 182 getprogname(), getprogname()); 183 exit(1); 184} 185 186static int 187getnum(const char *str, const char *what, size_t *res, int badnum) 188{ 189 unsigned long ul; 190 char *ep; 191 192 errno = 0; 193 ul = strtoul(str, &ep, 0); 194 if (*str != '\0' && *ep == '\0') { 195 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX) 196 errx(1, "%s number `%s' too big", what, str); 197 *res = (size_t)ul; 198 return 1; 199 } else if (badnum) 200 errx(1, "Bad %s number `%s'", what, str); 201 202 return 0; 203} 204 205/* 206 * Read up characters from the passed input file, forming lines, 207 * doing ^H processing, expanding tabs, stripping trailing blanks, 208 * and sending each line down for analysis. 209 */ 210static void 211fmt(FILE *fi) 212{ 213 struct buffer lbuf, cbuf; 214 wchar_t *cp, *cp2; 215 wint_t c; 216 int add_space; 217 size_t len, col, i; 218 219 if (center) { 220 for (;;) { 221 cp = fgetwln(fi, &len); 222 if (!cp) 223 return; 224 225 /* skip over leading space */ 226 while (len > 0) { 227 if (!iswspace(*cp)) 228 break; 229 cp++; 230 len--; 231 } 232 233 /* clear trailing space */ 234 while (len > 0) { 235 if (!iswspace((unsigned char)cp[len-1])) 236 break; 237 len--; 238 } 239 240 if (len == 0) { 241 /* blank line */ 242 (void)putwchar(L'\n'); 243 continue; 244 } 245 246 if (goal_length > len) { 247 for (i = 0; i < (goal_length - len) / 2; i++) { 248 (void)putwchar(L' '); 249 } 250 } 251 for (i = 0; i < len; i++) { 252 (void)putwchar(cp[i]); 253 } 254 (void)putwchar(L'\n'); 255 } 256 } 257 258 buf_init(&lbuf); 259 buf_init(&cbuf); 260 c = getwc(fi); 261 262 while (c != WEOF) { 263 /* 264 * Collect a line, doing ^H processing. 265 * Leave tabs for now. 266 */ 267 buf_reset(&lbuf); 268 while (c != '\n' && c != WEOF) { 269 if (c == '\b') { 270 (void)buf_unputc(&lbuf); 271 c = getwc(fi); 272 continue; 273 } 274 if(!(iswprint(c) || c == '\t' || c >= 160)) { 275 c = getwc(fi); 276 continue; 277 } 278 buf_putc(&lbuf, c); 279 c = getwc(fi); 280 } 281 buf_putc(&lbuf, '\0'); 282 (void)buf_unputc(&lbuf); 283 add_space = c != WEOF; 284 285 /* 286 * Expand tabs on the way. 287 */ 288 col = 0; 289 cp = lbuf.bptr; 290 buf_reset(&cbuf); 291 while ((c = *cp++) != '\0') { 292 if (c != '\t') { 293 col++; 294 buf_putc(&cbuf, c); 295 continue; 296 } 297 do { 298 buf_putc(&cbuf, ' '); 299 col++; 300 } while ((col & 07) != 0); 301 } 302 303 /* 304 * Swipe trailing blanks from the line. 305 */ 306 for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--) 307 continue; 308 cbuf.ptr = cp2 + 1; 309 buf_putc(&cbuf, '\0'); 310 (void)buf_unputc(&cbuf); 311 prefix(&cbuf, add_space); 312 if (c != WEOF) 313 c = getwc(fi); 314 } 315 buf_end(&cbuf); 316 buf_end(&lbuf); 317} 318 319/* 320 * Take a line devoid of tabs and other garbage and determine its 321 * blank prefix. If the indent changes, call for a linebreak. 322 * If the input line is blank, echo the blank line on the output. 323 * Finally, if the line minus the prefix is a mail header, try to keep 324 * it on a line by itself. 325 */ 326static void 327prefix(const struct buffer *buf, int add_space) 328{ 329 const wchar_t *cp; 330 const wchar_t **hp; 331 size_t np; 332 int h; 333 334 if (buf->ptr == buf->bptr) { 335 oflush(); 336 (void)putwchar(L'\n'); 337 return; 338 } 339 for (cp = buf->bptr; *cp == ' '; cp++) 340 continue; 341 np = cp - buf->bptr; 342 343 /* 344 * The following horrible expression attempts to avoid linebreaks 345 * when the indent changes due to a paragraph. 346 */ 347 if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8)) 348 oflush(); 349 if (!raw) { 350 if ((h = ishead(cp)) != 0) { 351 oflush(); 352 mark = lineno; 353 } 354 if (lineno - mark < 3 && lineno - mark > 0) 355 for (hp = &headnames[0]; *hp != NULL; hp++) 356 if (ispref(*hp, cp)) { 357 h = 1; 358 oflush(); 359 break; 360 } 361 if (!h && (h = (*cp == '.'))) 362 oflush(); 363 } else 364 h = 0; 365 pfx = np; 366 if (h) { 367 pack(cp, (size_t)(buf->ptr - cp)); 368 oflush(); 369 } else 370 split(cp, add_space); 371 lineno++; 372} 373 374/* 375 * Split up the passed line into output "words" which are 376 * maximal strings of non-blanks with the blank separation 377 * attached at the end. Pass these words along to the output 378 * line packer. 379 */ 380static void 381split(const wchar_t line[], int add_space) 382{ 383 const wchar_t *cp; 384 struct buffer word; 385 size_t wlen; 386 387 buf_init(&word); 388 cp = line; 389 while (*cp) { 390 buf_reset(&word); 391 wlen = 0; 392 393 /* 394 * Collect a 'word,' allowing it to contain escaped white 395 * space. 396 */ 397 while (*cp && *cp != ' ') { 398 if (*cp == '\\' && iswspace(cp[1])) 399 buf_putc(&word, *cp++); 400 buf_putc(&word, *cp++); 401 wlen++; 402 } 403 404 /* 405 * Guarantee a space at end of line. Two spaces after end of 406 * sentence punctuation. 407 */ 408 if (*cp == '\0' && add_space) { 409 buf_putc(&word, ' '); 410 if (strchr(".:!", cp[-1])) 411 buf_putc(&word, ' '); 412 } 413 while (*cp == ' ') 414 buf_putc(&word, *cp++); 415 416 buf_putc(&word, '\0'); 417 (void)buf_unputc(&word); 418 419 pack(word.bptr, wlen); 420 } 421 buf_end(&word); 422} 423 424/* 425 * Output section. 426 * Build up line images from the words passed in. Prefix 427 * each line with correct number of blanks. 428 * 429 * At the bottom of this whole mess, leading tabs are reinserted. 430 */ 431 432/* 433 * Pack a word onto the output line. If this is the beginning of 434 * the line, push on the appropriately-sized string of blanks first. 435 * If the word won't fit on the current line, flush and begin a new 436 * line. If the word is too long to fit all by itself on a line, 437 * just give it its own and hope for the best. 438 * 439 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the 440 * goal length, take it. If not, then check to see if the line 441 * will be over the max length; if so put the word on the next 442 * line. If not, check to see if the line will be closer to the 443 * goal length with or without the word and take it or put it on 444 * the next line accordingly. 445 */ 446 447static void 448pack(const wchar_t *word, size_t wlen) 449{ 450 const wchar_t *cp; 451 size_t s, t; 452 453 if (outbuf.bptr == outbuf.ptr) 454 leadin(); 455 /* 456 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the 457 * length of the line before the word is added; t is now the length 458 * of the line after the word is added 459 */ 460 s = outbuf.ptr - outbuf.bptr; 461 t = wlen + s; 462 if ((t <= goal_length) || ((t <= max_length) && 463 (s <= goal_length) && (t - goal_length <= goal_length - s))) { 464 /* 465 * In like flint! 466 */ 467 for (cp = word; *cp;) 468 buf_putc(&outbuf, *cp++); 469 return; 470 } 471 if (s > pfx) { 472 oflush(); 473 leadin(); 474 } 475 for (cp = word; *cp;) 476 buf_putc(&outbuf, *cp++); 477} 478 479/* 480 * If there is anything on the current output line, send it on 481 * its way. Reset outbuf. 482 */ 483static void 484oflush(void) 485{ 486 if (outbuf.bptr == outbuf.ptr) 487 return; 488 buf_putc(&outbuf, '\0'); 489 (void)buf_unputc(&outbuf); 490 tabulate(&outbuf); 491 buf_reset(&outbuf); 492} 493 494/* 495 * Take the passed line buffer, insert leading tabs where possible, and 496 * output on standard output (finally). 497 */ 498static void 499tabulate(struct buffer *buf) 500{ 501 wchar_t *cp; 502 size_t b, t; 503 504 /* 505 * Toss trailing blanks in the output line. 506 */ 507 for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--) 508 continue; 509 *++cp = '\0'; 510 511 /* 512 * Count the leading blank space and tabulate. 513 */ 514 for (cp = buf->bptr; *cp == ' '; cp++) 515 continue; 516 b = cp - buf->bptr; 517 t = b / 8; 518 b = b % 8; 519 if (t > 0) 520 do 521 (void)putwchar(L'\t'); 522 while (--t); 523 if (b > 0) 524 do 525 (void)putwchar(L' '); 526 while (--b); 527 while (*cp) 528 (void)putwchar(*cp++); 529 (void)putwchar(L'\n'); 530} 531 532/* 533 * Initialize the output line with the appropriate number of 534 * leading blanks. 535 */ 536static void 537leadin(void) 538{ 539 size_t b; 540 541 buf_reset(&outbuf); 542 543 for (b = 0; b < pfx; b++) 544 buf_putc(&outbuf, ' '); 545} 546 547/* 548 * Is s1 a prefix of s2?? 549 */ 550static int 551ispref(const wchar_t *s1, const wchar_t *s2) 552{ 553 554 while (*s1++ == *s2) 555 continue; 556 return *s1 == '\0'; 557} 558