1/*	$NetBSD: uniq.c,v 1.22 2019/04/23 17:35:10 christos Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Case Larsen.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36#ifndef lint
37__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
38 The Regents of the University of California.  All rights reserved.");
39#endif /* not lint */
40
41#ifndef lint
42#if 0
43static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
44#endif
45__RCSID("$NetBSD: uniq.c,v 1.22 2019/04/23 17:35:10 christos Exp $");
46#endif /* not lint */
47
48#include <err.h>
49#include <errno.h>
50#include <stdio.h>
51#include <ctype.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55
56static int cflag, dflag, uflag;
57static int numchars, numfields, repeats;
58
59static FILE *file(const char *, const char *);
60static void show(FILE *, const char *);
61static const char *skip(const char *, size_t *);
62static void obsolete(char *[]);
63static void usage(void) __dead;
64
65int
66main (int argc, char *argv[])
67{
68	const char *prevp, *thisp;
69	FILE *ifp, *ofp;
70	int ch;
71	char *prevline, *thisline, *p;
72	size_t prevlinesize, thislinesize, psize;
73	size_t prevlen, thislen;
74
75	setprogname(argv[0]);
76	ifp = ofp = NULL;
77	obsolete(argv);
78	while ((ch = getopt(argc, argv, "cdf:s:u")) != -1)
79		switch (ch) {
80		case 'c':
81			cflag = 1;
82			break;
83		case 'd':
84			dflag = 1;
85			break;
86		case 'f':
87			numfields = strtol(optarg, &p, 10);
88			if (numfields < 0 || *p)
89				errx(1, "illegal field skip value: %s", optarg);
90			break;
91		case 's':
92			numchars = strtol(optarg, &p, 10);
93			if (numchars < 0 || *p)
94				errx(1, "illegal character skip value: %s",
95				    optarg);
96			break;
97		case 'u':
98			uflag = 1;
99			break;
100		case '?':
101		default:
102			usage();
103	}
104
105	argc -= optind;
106	argv +=optind;
107
108	switch(argc) {
109	case 0:
110		ifp = stdin;
111		ofp = stdout;
112		break;
113	case 1:
114		ifp = file(argv[0], "r");
115		ofp = stdout;
116		break;
117	case 2:
118		ifp = file(argv[0], "r");
119		ofp = file(argv[1], "w");
120		break;
121	default:
122		usage();
123	}
124
125	if ((p = fgetln(ifp, &psize)) == NULL)
126		return 0;
127	prevlinesize = prevlen = psize;
128	if ((prevline = malloc(prevlinesize + 1)) == NULL)
129		err(1, "malloc");
130	(void)memcpy(prevline, p, prevlinesize);
131	prevline[prevlinesize] = '\0';
132
133	if (numfields || numchars)
134		prevp = skip(prevline, &prevlen);
135	else
136		prevp = prevline;
137
138	thislinesize = psize;
139	if ((thisline = malloc(thislinesize + 1)) == NULL)
140		err(1, "malloc");
141
142	while ((p = fgetln(ifp, &psize)) != NULL) {
143		if (psize > thislinesize) {
144			if ((thisline = realloc(thisline, psize + 1)) == NULL)
145				err(1, "realloc");
146			thislinesize = psize;
147		}
148		thislen = psize;
149		(void)memcpy(thisline, p, psize);
150		thisline[psize] = '\0';
151
152		/* If requested get the chosen fields + character offsets. */
153		if (numfields || numchars) {
154			thisp = skip(thisline, &thislen);
155		} else {
156			thisp = thisline;
157		}
158
159		/* If different, print; set previous to new value. */
160		if (thislen != prevlen || strcmp(thisp, prevp)) {
161			char *t;
162			size_t ts;
163
164			show(ofp, prevline);
165			t = prevline;
166			prevline = thisline;
167			thisline = t;
168			ts = prevlinesize;
169			prevlinesize = thislinesize;
170			thislinesize = ts;
171			prevp = thisp;
172			prevlen = thislen;
173			repeats = 0;
174		} else
175			++repeats;
176	}
177	show(ofp, prevline);
178	free(prevline);
179	free(thisline);
180	return 0;
181}
182
183/*
184 * show --
185 *	Output a line depending on the flags and number of repetitions
186 *	of the line.
187 */
188static void
189show(FILE *ofp, const char *str)
190{
191
192	if ((dflag && repeats == 0) || (uflag && repeats > 0))
193		return;
194	if (cflag) {
195		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
196	} else {
197		(void)fprintf(ofp, "%s", str);
198	}
199}
200
201static const char *
202skip(const char *str, size_t *linesize)
203{
204	int infield, nchars, nfields;
205	size_t ls = *linesize;
206
207	for (nfields = numfields, infield = 0; nfields && *str; ++str, --ls)
208		if (isspace((unsigned char)*str)) {
209			if (infield) {
210				infield = 0;
211				--nfields;
212			}
213		} else if (!infield)
214			infield = 1;
215	for (nchars = numchars; nchars-- && *str; ++str, --ls)
216		continue;
217	*linesize = ls;
218	return str;
219}
220
221static FILE *
222file(const char *name, const char *mode)
223{
224	FILE *fp;
225
226	if ((fp = fopen(name, mode)) == NULL)
227		err(1, "%s", name);
228	return(fp);
229}
230
231static void
232obsolete(char *argv[])
233{
234	char *ap, *p, *start;
235
236	while ((ap = *++argv) != NULL) {
237		/* Return if "--" or not an option of any form. */
238		if (ap[0] != '-') {
239			if (ap[0] != '+')
240				return;
241		} else if (ap[1] == '-')
242			return;
243		if (!isdigit((unsigned char)ap[1]))
244			continue;
245		/*
246		 * Digit signifies an old-style option.  Malloc space for dash,
247		 * new option and argument.
248		 */
249		(void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1);
250		if (!p)
251			err(1, "malloc");
252		start = p;
253		*argv = start;
254	}
255}
256
257static void
258usage(void)
259{
260	(void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] "
261	    "[input_file [output_file]]\n", getprogname());
262	exit(1);
263}
264