1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Case Larsen.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40	The Regents of the University of California.  All rights reserved.\n";
41#endif /* not lint */
42
43#ifndef lint
44#if 0
45static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
46#endif
47static const char rcsid[] =
48  "$FreeBSD: src/usr.bin/uniq/uniq.c,v 1.26 2004/09/14 12:01:18 tjr Exp $";
49#endif /* not lint */
50
51#include <ctype.h>
52#include <err.h>
53#include <limits.h>
54#include <locale.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58#include <unistd.h>
59#include <wchar.h>
60#include <wctype.h>
61
62#define	MAXLINELEN	(8 * 1024)
63
64int cflag, dflag, uflag;
65int numchars, numfields, repeats;
66
67FILE	*file(const char *, const char *);
68wchar_t	*uniq_getline(wchar_t *, size_t, FILE *);
69void	 show(FILE *, wchar_t *);
70wchar_t	*skip(wchar_t *);
71void	 obsolete(char *[]);
72static void	 usage(void);
73int      wcsicoll(wchar_t *, wchar_t *);
74
75int
76main (int argc, char *argv[])
77{
78	wchar_t *t1, *t2;
79	FILE *ifp, *ofp;
80	int ch;
81	wchar_t *prevline, *thisline;
82	char *p;
83	const char *ifn;
84	int iflag = 0, comp;
85
86	(void) setlocale(LC_ALL, "");
87
88	obsolete(argv);
89	while ((ch = getopt(argc, argv, "cdif:s:u")) != -1)
90		switch (ch) {
91		case 'c':
92			cflag = 1;
93			break;
94		case 'd':
95			dflag = 1;
96			break;
97		case 'i':
98			iflag = 1;
99			break;
100		case 'f':
101			numfields = strtol(optarg, &p, 10);
102			if (numfields < 0 || *p)
103				errx(1, "illegal field skip value: %s", optarg);
104			break;
105		case 's':
106			numchars = strtol(optarg, &p, 10);
107			if (numchars < 0 || *p)
108				errx(1, "illegal character skip value: %s", optarg);
109			break;
110		case 'u':
111			uflag = 1;
112			break;
113		case '?':
114		default:
115			usage();
116	}
117
118	argc -= optind;
119	argv +=optind;
120
121	/* If no flags are set, default is -d -u. */
122	if (cflag) {
123		if (dflag || uflag)
124			usage();
125	} else if (!dflag && !uflag)
126		dflag = uflag = 1;
127
128	if (argc > 2)
129		usage();
130
131	ifp = stdin;
132	ifn = "stdin";
133	ofp = stdout;
134	if (argc > 0 && argv[0] && strcmp(argv[0], "-") != 0)
135		ifp = file(ifn = argv[0], "r");
136	if (argc > 1 && argv[1])
137		ofp = file(argv[1], "w");
138
139	prevline = malloc(MAXLINELEN * sizeof(*prevline));
140	thisline = malloc(MAXLINELEN * sizeof(*thisline));
141	if (prevline == NULL || thisline == NULL)
142		err(1, "malloc");
143
144	if (uniq_getline(prevline, MAXLINELEN, ifp) == NULL) {
145		if (ferror(ifp))
146			err(1, "%s", ifp == stdin ? "stdin" : argv[0]);
147		exit(0);
148	}
149	if (!cflag && uflag && dflag)
150		show(ofp, prevline);
151
152	while (uniq_getline(thisline, MAXLINELEN, ifp)) {
153		/* If requested get the chosen fields + character offsets. */
154		if (numfields || numchars) {
155			t1 = skip(thisline);
156			t2 = skip(prevline);
157		} else {
158			t1 = thisline;
159			t2 = prevline;
160		}
161
162		/* If different, print; set previous to new value. */
163		if (iflag)
164			comp = wcsicoll(t1, t2);
165		else
166			comp = wcscoll(t1, t2);
167
168		if (comp) {
169			if (cflag || !dflag || !uflag)
170				show(ofp, prevline);
171			t1 = prevline;
172			prevline = thisline;
173			if (!cflag && uflag && dflag)
174				show(ofp, prevline);
175			thisline = t1;
176			repeats = 0;
177		} else
178			++repeats;
179	}
180	if (ferror(ifp))
181		err(1, "%s", ifp == stdin ? "stdin" : argv[0]);
182	if (cflag || !dflag || !uflag)
183		show(ofp, prevline);
184	exit(0);
185}
186
187wchar_t *
188uniq_getline(wchar_t *buf, size_t buflen, FILE *fp)
189{
190	size_t bufpos;
191	wint_t ch;
192
193	bufpos = 0;
194	while (bufpos + 2 != buflen && (ch = getwc(fp)) != WEOF && ch != '\n')
195		buf[bufpos++] = ch;
196	if (bufpos + 1 != buflen)
197		buf[bufpos] = '\0';
198	while (ch != WEOF && ch != '\n')
199		ch = getwc(fp);
200
201	return (bufpos != 0 || ch == '\n' ? buf : NULL);
202}
203
204/*
205 * show --
206 *	Output a line depending on the flags and number of repetitions
207 *	of the line.
208 */
209void
210show(FILE *ofp, wchar_t *str)
211{
212
213	if (cflag)
214		(void)fprintf(ofp, "%4d %ls\n", repeats + 1, str);
215	if ((dflag && repeats) || (uflag && !repeats))
216		(void)fprintf(ofp, "%ls\n", str);
217}
218
219wchar_t *
220skip(wchar_t *str)
221{
222	int nchars, nfields;
223
224	for (nfields = 0; *str != '\0' && nfields++ != numfields; ) {
225		while (iswblank(*str))
226			str++;
227		while (*str != '\0' && !iswblank(*str))
228			str++;
229	}
230	for (nchars = numchars; nchars-- && *str; ++str);
231	return(str);
232}
233
234FILE *
235file(const char *name, const char *mode)
236{
237	FILE *fp;
238
239	if ((fp = fopen(name, mode)) == NULL)
240		err(1, "%s", name);
241	return(fp);
242}
243
244void
245obsolete(char *argv[])
246{
247	int len;
248	char *ap, *p, *start;
249
250	while ((ap = *++argv)) {
251		/* Return if "--" or not an option of any form. */
252		if (ap[0] != '-') {
253			if (ap[0] != '+')
254				return;
255		} else if (ap[1] == '-')
256			return;
257		if (!isdigit((unsigned char)ap[1]))
258			continue;
259		/*
260		 * Digit signifies an old-style option.  Malloc space for dash,
261		 * new option and argument.
262		 */
263		len = strlen(ap);
264		if ((start = p = malloc(len + 3)) == NULL)
265			err(1, "malloc");
266		*p++ = '-';
267		*p++ = ap[0] == '+' ? 's' : 'f';
268		(void)strcpy(p, ap + 1);
269		*argv = start;
270	}
271}
272
273static void
274usage(void)
275{
276	(void)fprintf(stderr,
277"usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
278	exit(1);
279}
280
281int
282wcsicoll(wchar_t *s1, wchar_t *s2)
283{
284	wchar_t *p, line1[MAXLINELEN], line2[MAXLINELEN];
285
286	for (p = line1; *s1; s1++)
287		*p++ = towlower(*s1);
288	*p = '\0';
289	for (p = line2; *s2; s2++)
290		*p++ = towlower(*s2);
291	*p = '\0';
292	return (wcscoll(line1, line2));
293}
294