1231071Sed/*-
2231071Sed * Copyright (c) 2012 Ed Schouten <ed@FreeBSD.org>
3231071Sed * All rights reserved.
4231071Sed *
5231071Sed * Redistribution and use in source and binary forms, with or without
6231071Sed * modification, are permitted provided that the following conditions
7231071Sed * are met:
8231071Sed * 1. Redistributions of source code must retain the above copyright
9231071Sed *    notice, this list of conditions and the following disclaimer.
10231071Sed * 2. Redistributions in binary form must reproduce the above copyright
11231071Sed *    notice, this list of conditions and the following disclaimer in the
12231071Sed *    documentation and/or other materials provided with the distribution.
13231071Sed *
14231071Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15231071Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16231071Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17231071Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18231071Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19231071Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20231071Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21231071Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22231071Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23231071Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24231071Sed * SUCH DAMAGE.
25231071Sed */
26231071Sed
27231071Sed#include <sys/cdefs.h>
28231071Sed__FBSDID("$FreeBSD$");
29231071Sed
30231071Sed#include <ctype.h>
31231071Sed#include <stdbool.h>
32231071Sed#include <stdio.h>
33231071Sed#include <stdlib.h>
34231071Sed
35231071Sedstatic char *queue = NULL;
36231071Sedstatic size_t queuelen = 0, queuesize = 0;
37231071Sedstatic off_t column = 0;
38231071Sed
39231071Sedstatic void
40231071Sedsavebyte(char c)
41231071Sed{
42231071Sed
43231071Sed	if (queuelen >= queuesize) {
44231071Sed		queuesize += 128;
45231071Sed		queue = realloc(queue, queuesize);
46231071Sed		if (queue == NULL) {
47231071Sed			perror("malloc");
48231071Sed			exit(1);
49231071Sed		}
50231071Sed	}
51231071Sed	queue[queuelen++] = c;
52231071Sed
53231071Sed	switch (c) {
54231071Sed	case '\n':
55231071Sed		column = 0;
56231071Sed		break;
57231071Sed	case ' ':
58231071Sed		column++;
59231071Sed		break;
60231071Sed	case '\t':
61231071Sed		column = (column / 8 + 1) * 8;
62231071Sed		break;
63231071Sed	}
64231071Sed}
65231071Sed
66231071Sedstatic bool
67231071Sedpeekbyte(size_t back, char c)
68231071Sed{
69231071Sed
70231071Sed	return (queuelen >= back && queue[queuelen - back] == c);
71231071Sed}
72231071Sed
73231071Sedstatic void
74231071Sedsavewhite(char c, bool leading)
75231071Sed{
76231071Sed	off_t ncolumn;
77231071Sed
78231071Sed	switch (c) {
79231071Sed	case '\n':
80231071Sed		if (leading) {
81231071Sed			/* Remove empty lines before input. */
82231071Sed			queuelen = 0;
83231071Sed			column = 0;
84231071Sed		} else {
85231071Sed			/* Remove trailing whitespace. */
86231071Sed			while (peekbyte(1, ' ') || peekbyte(1, '\t'))
87231071Sed				queuelen--;
88231071Sed			/* Remove redundant empty lines. */
89231071Sed			if (peekbyte(2, '\n') && peekbyte(1, '\n'))
90231071Sed				return;
91231071Sed			savebyte('\n');
92231071Sed		}
93231071Sed		break;
94231071Sed	case ' ':
95231071Sed		savebyte(' ');
96231071Sed		break;
97231071Sed	case '\t':
98299826Spfg		/* Convert preceding spaces to tabs. */
99231071Sed		ncolumn = (column / 8 + 1) * 8;
100231071Sed		while (peekbyte(1, ' ')) {
101231071Sed			queuelen--;
102231071Sed			column--;
103231071Sed		}
104231071Sed		while (column < ncolumn)
105231071Sed			savebyte('\t');
106231071Sed		break;
107231071Sed	}
108231071Sed}
109231071Sed
110231071Sedstatic void
111231071Sedprintwhite(void)
112231071Sed{
113231098Sed	off_t i;
114231071Sed
115231098Sed	/* Merge spaces at the start of a sentence to tabs if possible. */
116231098Sed	if ((column % 8) == 0) {
117231098Sed		for (i = 0; i < column; i++)
118231098Sed			if (!peekbyte(i + 1, ' '))
119231098Sed				break;
120231098Sed		if (i == column) {
121231098Sed			queuelen -= column;
122231098Sed			for (i = 0; i < column; i += 8)
123231098Sed				queue[queuelen++] = '\t';
124231098Sed		}
125231098Sed	}
126231098Sed
127231071Sed	if (fwrite(queue, 1, queuelen, stdout) != queuelen) {
128231071Sed		perror("write");
129231071Sed		exit(1);
130231071Sed	}
131231071Sed	queuelen = 0;
132231071Sed}
133231071Sed
134231071Sedstatic char
135231071Sedreadchar(void)
136231071Sed{
137231071Sed	int c;
138231071Sed
139231071Sed	c = getchar();
140231071Sed	if (c == EOF && ferror(stdin)) {
141231071Sed		perror("read");
142231071Sed		exit(1);
143231071Sed	}
144231071Sed	return (c);
145231071Sed}
146231071Sed
147231071Sedstatic void
148231071Sedwritechar(char c)
149231071Sed{
150231071Sed
151231071Sed	if (putchar(c) == EOF) {
152231071Sed		perror("write");
153231071Sed		exit(1);
154231071Sed	}
155231071Sed	/* XXX: Multi-byte characters. */
156231071Sed	column++;
157231071Sed}
158231071Sed
159231071Sedint
160231071Sedmain(void)
161231071Sed{
162231071Sed	int c;
163231071Sed	bool leading = true;
164231071Sed
165231071Sed	while ((c = readchar()) != EOF) {
166231071Sed		if (isspace(c))
167231071Sed			/* Save whitespace. */
168231071Sed			savewhite(c, leading);
169231071Sed		else {
170231071Sed			/* Reprint whitespace and print regular character. */
171231071Sed			printwhite();
172231071Sed			writechar(c);
173231071Sed			leading = false;
174231071Sed		}
175231071Sed	}
176231071Sed	/* Terminate non-empty files with a newline. */
177231071Sed	if (!leading)
178231071Sed		writechar('\n');
179231071Sed	return (0);
180231071Sed}
181