comm.c revision 179374
1/*
2 * Copyright (c) 1989, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Case Larsen.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1989, 1993, 1994\n\
40	The Regents of the University of California.  All rights reserved.\n";
41#endif
42
43#if 0
44#ifndef lint
45static char sccsid[] = "From: @(#)comm.c	8.4 (Berkeley) 5/4/95";
46#endif
47#endif
48
49#include <sys/cdefs.h>
50__FBSDID("$FreeBSD: head/usr.bin/comm/comm.c 179374 2008-05-28 14:13:35Z ghelmer $");
51
52#include <err.h>
53#include <limits.h>
54#include <locale.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58#include <unistd.h>
59#include <wchar.h>
60#include <wctype.h>
61
62#define	MAXLINELEN	(LINE_MAX + 1)
63
64const wchar_t *tabs[] = { L"", L"\t", L"\t\t" };
65
66FILE   *file(const char *);
67wchar_t	*getline(wchar_t *, size_t *, FILE *);
68void	show(FILE *, const char *, const wchar_t *, wchar_t *, size_t *);
69int     wcsicoll(const wchar_t *, const wchar_t *);
70static void	usage(void);
71
72int
73main(int argc, char *argv[])
74{
75	int comp, read1, read2;
76	int ch, flag1, flag2, flag3, iflag;
77	FILE *fp1, *fp2;
78	const wchar_t *col1, *col2, *col3;
79	size_t line1len, line2len;
80	wchar_t *line1, *line2;
81	const wchar_t **p;
82
83	flag1 = flag2 = flag3 = 1;
84	iflag = 0;
85
86 	line1len = MAXLINELEN;
87 	line2len = MAXLINELEN;
88 	line1 = malloc(line1len * sizeof(*line1));
89 	line2 = malloc(line2len * sizeof(*line2));
90	if (line1 == NULL || line2 == NULL)
91		err(1, "malloc");
92
93	(void) setlocale(LC_ALL, "");
94
95	while ((ch = getopt(argc, argv, "123i")) != -1)
96		switch(ch) {
97		case '1':
98			flag1 = 0;
99			break;
100		case '2':
101			flag2 = 0;
102			break;
103		case '3':
104			flag3 = 0;
105			break;
106		case 'i':
107			iflag = 1;
108			break;
109		case '?':
110		default:
111			usage();
112		}
113	argc -= optind;
114	argv += optind;
115
116	if (argc != 2)
117		usage();
118
119	fp1 = file(argv[0]);
120	fp2 = file(argv[1]);
121
122	/* for each column printed, add another tab offset */
123	p = tabs;
124	col1 = col2 = col3 = NULL;
125	if (flag1)
126		col1 = *p++;
127	if (flag2)
128		col2 = *p++;
129	if (flag3)
130		col3 = *p;
131
132	for (read1 = read2 = 1;;) {
133		/* read next line, check for EOF */
134		if (read1) {
135			line1 = getline(line1, &line1len, fp1);
136			if (line1 == NULL && ferror(fp1))
137				err(1, "%s", argv[0]);
138		}
139		if (read2) {
140			line2 = getline(line2, &line2len, fp2);
141			if (line2 == NULL && ferror(fp2))
142				err(1, "%s", argv[1]);
143		}
144
145		/* if one file done, display the rest of the other file */
146		if (line1 == NULL) {
147			if (line2 != NULL && col2 != NULL)
148				show(fp2, argv[1], col2, line2, &line2len);
149			break;
150		}
151		if (line2 == NULL) {
152			if (line1 != NULL && col1 != NULL)
153				show(fp1, argv[0], col1, line1, &line1len);
154			break;
155		}
156
157		/* lines are the same */
158		if(iflag)
159			comp = wcsicoll(line1, line2);
160		else
161			comp = wcscoll(line1, line2);
162
163		if (!comp) {
164			read1 = read2 = 1;
165			if (col3 != NULL)
166				(void)printf("%ls%ls", col3, line1);
167			continue;
168		}
169
170		/* lines are different */
171		if (comp < 0) {
172			read1 = 1;
173			read2 = 0;
174			if (col1 != NULL)
175				(void)printf("%ls%ls", col1, line1);
176		} else {
177			read1 = 0;
178			read2 = 1;
179			if (col2 != NULL)
180				(void)printf("%ls%ls", col2, line2);
181		}
182	}
183	exit(0);
184}
185
186wchar_t *
187getline(wchar_t *buf, size_t *buflen, FILE *fp)
188{
189	size_t bufpos;
190	wint_t ch;
191
192	bufpos = 0;
193	do {
194		if ((ch = getwc(fp)) != WEOF) {
195			if (bufpos + 2 >= *buflen) {
196				*buflen = *buflen * 2;
197				buf = reallocf(buf, *buflen * sizeof(*buf));
198				if (buf == NULL)
199					return (NULL);
200			}
201			buf[bufpos++] = ch;
202		}
203	} while (ch != WEOF && ch != '\n');
204	if (bufpos + 1 != *buflen)
205		buf[bufpos] = '\0';
206
207	return (bufpos != 0 || ch == '\n' ? buf : NULL);
208}
209
210void
211show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf, size_t *buflen)
212{
213
214	do {
215		(void)printf("%ls%ls", offset, buf);
216	} while ((buf = getline(buf, buflen, fp)) != NULL);
217	if (ferror(fp))
218		err(1, "%s", fn);
219}
220
221FILE *
222file(const char *name)
223{
224	FILE *fp;
225
226	if (!strcmp(name, "-"))
227		return (stdin);
228	if ((fp = fopen(name, "r")) == NULL) {
229		err(1, "%s", name);
230	}
231	return (fp);
232}
233
234static void
235usage(void)
236{
237	(void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
238	exit(1);
239}
240
241static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
242static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
243
244int
245wcsicoll(const wchar_t *s1, const wchar_t *s2)
246{
247	wchar_t *p;
248	size_t l1, l2;
249	size_t new_l1_buflen, new_l2_buflen;
250
251	l1 = wcslen(s1) + 1;
252	l2 = wcslen(s2) + 1;
253	new_l1_buflen = wcsicoll_l1_buflen;
254	new_l2_buflen = wcsicoll_l2_buflen;
255	while (new_l1_buflen < l1) {
256		if (new_l1_buflen == 0)
257			new_l1_buflen = MAXLINELEN;
258		else
259			new_l1_buflen *= 2;
260	}
261	while (new_l2_buflen < l2) {
262		if (new_l2_buflen == 0)
263			new_l2_buflen = MAXLINELEN;
264		else
265			new_l2_buflen *= 2;
266	}
267	if (new_l1_buflen > wcsicoll_l1_buflen) {
268		wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
269		if (wcsicoll_l1_buf == NULL)
270                	err(1, "reallocf");
271		wcsicoll_l1_buflen = new_l1_buflen;
272	}
273	if (new_l2_buflen > wcsicoll_l2_buflen) {
274		wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
275		if (wcsicoll_l2_buf == NULL)
276                	err(1, "reallocf");
277		wcsicoll_l2_buflen = new_l2_buflen;
278	}
279
280	for (p = wcsicoll_l1_buf; *s1; s1++)
281		*p++ = towlower(*s1);
282	*p = '\0';
283	for (p = wcsicoll_l2_buf; *s2; s2++)
284		*p++ = towlower(*s2);
285	*p = '\0';
286
287	return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
288}
289