uniq.c revision 28503
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Case Larsen.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40	The Regents of the University of California.  All rights reserved.\n";
41#endif /* not lint */
42
43#ifndef lint
44#if 0
45static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
46#endif
47static const char rcsid[] =
48	"$Id$";
49#endif /* not lint */
50
51#include <ctype.h>
52#include <err.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <unistd.h>
57
58#define	MAXLINELEN	(8 * 1024)
59
60int cflag, dflag, uflag;
61int numchars, numfields, repeats;
62
63FILE	*file __P((char *, char *));
64void	 show __P((FILE *, char *));
65char	*skip __P((char *));
66void	 obsolete __P((char *[]));
67static void	 usage __P((void));
68
69int
70main (argc, argv)
71	int argc;
72	char *argv[];
73{
74	register char *t1, *t2;
75	FILE *ifp, *ofp;
76	int ch;
77	char *prevline, *thisline, *p;
78
79	obsolete(argv);
80	while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1)
81		switch (ch) {
82		case '-':
83			--optind;
84			goto done;
85		case 'c':
86			cflag = 1;
87			break;
88		case 'd':
89			dflag = 1;
90			break;
91		case 'f':
92			numfields = strtol(optarg, &p, 10);
93			if (numfields < 0 || *p)
94				errx(1, "illegal field skip value: %s", optarg);
95			break;
96		case 's':
97			numchars = strtol(optarg, &p, 10);
98			if (numchars < 0 || *p)
99				errx(1, "illegal character skip value: %s", optarg);
100			break;
101		case 'u':
102			uflag = 1;
103			break;
104		case '?':
105		default:
106			usage();
107	}
108
109done:	argc -= optind;
110	argv +=optind;
111
112	/* If no flags are set, default is -d -u. */
113	if (cflag) {
114		if (dflag || uflag)
115			usage();
116	} else if (!dflag && !uflag)
117		dflag = uflag = 1;
118
119	switch(argc) {
120	case 0:
121		ifp = stdin;
122		ofp = stdout;
123		break;
124	case 1:
125		ifp = file(argv[0], "r");
126		ofp = stdout;
127		break;
128	case 2:
129		ifp = file(argv[0], "r");
130		ofp = file(argv[1], "w");
131		break;
132	default:
133		usage();
134	}
135
136	prevline = malloc(MAXLINELEN);
137	thisline = malloc(MAXLINELEN);
138	if (prevline == NULL || thisline == NULL)
139		errx(1, "malloc");
140
141	if (fgets(prevline, MAXLINELEN, ifp) == NULL)
142		exit(0);
143
144	while (fgets(thisline, MAXLINELEN, ifp)) {
145		/* If requested get the chosen fields + character offsets. */
146		if (numfields || numchars) {
147			t1 = skip(thisline);
148			t2 = skip(prevline);
149		} else {
150			t1 = thisline;
151			t2 = prevline;
152		}
153
154		/* If different, print; set previous to new value. */
155		if (strcmp(t1, t2)) {
156			show(ofp, prevline);
157			t1 = prevline;
158			prevline = thisline;
159			thisline = t1;
160			repeats = 0;
161		} else
162			++repeats;
163	}
164	show(ofp, prevline);
165	exit(0);
166}
167
168/*
169 * show --
170 *	Output a line depending on the flags and number of repetitions
171 *	of the line.
172 */
173void
174show(ofp, str)
175	FILE *ofp;
176	char *str;
177{
178
179	if (cflag && *str)
180		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
181	if ((dflag && repeats) || (uflag && !repeats))
182		(void)fprintf(ofp, "%s", str);
183}
184
185char *
186skip(str)
187	register char *str;
188{
189	register int infield, nchars, nfields;
190
191	for (nfields = numfields, infield = 0; nfields && *str; ++str)
192		if (isspace(*str)) {
193			if (infield) {
194				infield = 0;
195				--nfields;
196			}
197		} else if (!infield)
198			infield = 1;
199	for (nchars = numchars; nchars-- && *str; ++str);
200	return(str);
201}
202
203FILE *
204file(name, mode)
205	char *name, *mode;
206{
207	FILE *fp;
208
209	if ((fp = fopen(name, mode)) == NULL)
210		err(1, "%s", name);
211	return(fp);
212}
213
214void
215obsolete(argv)
216	char *argv[];
217{
218	int len;
219	char *ap, *p, *start;
220
221	while ((ap = *++argv)) {
222		/* Return if "--" or not an option of any form. */
223		if (ap[0] != '-') {
224			if (ap[0] != '+')
225				return;
226		} else if (ap[1] == '-')
227			return;
228		if (!isdigit(ap[1]))
229			continue;
230		/*
231		 * Digit signifies an old-style option.  Malloc space for dash,
232		 * new option and argument.
233		 */
234		len = strlen(ap);
235		if ((start = p = malloc(len + 3)) == NULL)
236			errx(1, "malloc");
237		*p++ = '-';
238		*p++ = ap[0] == '+' ? 's' : 'f';
239		(void)strcpy(p, ap + 1);
240		*argv = start;
241	}
242}
243
244static void
245usage()
246{
247	(void)fprintf(stderr,
248	    "usage: uniq [-c | -du] [-f fields] [-s chars] [input [output]]\n");
249	exit(1);
250}
251