11590Srgrimes/*
21590Srgrimes * Copyright (c) 1989, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Case Larsen.
71590Srgrimes *
81590Srgrimes * Redistribution and use in source and binary forms, with or without
91590Srgrimes * modification, are permitted provided that the following conditions
101590Srgrimes * are met:
111590Srgrimes * 1. Redistributions of source code must retain the above copyright
121590Srgrimes *    notice, this list of conditions and the following disclaimer.
131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer in the
151590Srgrimes *    documentation and/or other materials provided with the distribution.
161590Srgrimes * 4. Neither the name of the University nor the names of its contributors
171590Srgrimes *    may be used to endorse or promote products derived from this software
181590Srgrimes *    without specific prior written permission.
191590Srgrimes *
201590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301590Srgrimes * SUCH DAMAGE.
311590Srgrimes */
321590Srgrimes
331590Srgrimes#ifndef lint
3428503Scharnierstatic const char copyright[] =
351590Srgrimes"@(#) Copyright (c) 1989, 1993\n\
361590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
371590Srgrimes#endif /* not lint */
381590Srgrimes
391590Srgrimes#ifndef lint
4028503Scharnier#if 0
4123690Speterstatic char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
4228503Scharnier#endif
4328503Scharnierstatic const char rcsid[] =
4450477Speter  "$FreeBSD$";
451590Srgrimes#endif /* not lint */
461590Srgrimes
4728503Scharnier#include <ctype.h>
4828503Scharnier#include <err.h>
4952455Sache#include <limits.h>
5034323Sache#include <locale.h>
51200633Sjh#include <stdint.h>
52204876Sache#define _WITH_GETLINE
531590Srgrimes#include <stdio.h>
541590Srgrimes#include <stdlib.h>
551590Srgrimes#include <string.h>
5623690Speter#include <unistd.h>
57131502Stjr#include <wchar.h>
58131502Stjr#include <wctype.h>
591590Srgrimes
60204803Sacheint cflag, dflag, uflag, iflag;
611590Srgrimesint numchars, numfields, repeats;
621590Srgrimes
6392922SimpFILE	*file(const char *, const char *);
64204876Sachewchar_t	*convert(const char *);
65204876Sacheint	 inlcmp(const char *, const char *);
66204803Sachevoid	 show(FILE *, const char *);
67131502Stjrwchar_t	*skip(wchar_t *);
6892922Simpvoid	 obsolete(char *[]);
6992922Simpstatic void	 usage(void);
701590Srgrimes
711590Srgrimesint
72102944Sdwmalonemain (int argc, char *argv[])
731590Srgrimes{
74204876Sache	wchar_t *tprev, *tthis;
751590Srgrimes	FILE *ifp, *ofp;
76204803Sache	int ch, comp;
77204876Sache	size_t prevbuflen, thisbuflen, b1;
78204803Sache	char *prevline, *thisline, *p;
79131502Stjr	const char *ifn;
801590Srgrimes
8195030Sache	(void) setlocale(LC_ALL, "");
8234323Sache
831590Srgrimes	obsolete(argv);
8497527Stjr	while ((ch = getopt(argc, argv, "cdif:s:u")) != -1)
851590Srgrimes		switch (ch) {
861590Srgrimes		case 'c':
871590Srgrimes			cflag = 1;
881590Srgrimes			break;
891590Srgrimes		case 'd':
901590Srgrimes			dflag = 1;
911590Srgrimes			break;
9229207Sjoerg		case 'i':
9329207Sjoerg			iflag = 1;
9429207Sjoerg			break;
951590Srgrimes		case 'f':
961590Srgrimes			numfields = strtol(optarg, &p, 10);
971590Srgrimes			if (numfields < 0 || *p)
9828503Scharnier				errx(1, "illegal field skip value: %s", optarg);
991590Srgrimes			break;
1001590Srgrimes		case 's':
1011590Srgrimes			numchars = strtol(optarg, &p, 10);
1021590Srgrimes			if (numchars < 0 || *p)
10328503Scharnier				errx(1, "illegal character skip value: %s", optarg);
1041590Srgrimes			break;
1051590Srgrimes		case 'u':
1061590Srgrimes			uflag = 1;
1071590Srgrimes			break;
1081590Srgrimes		case '?':
1091590Srgrimes		default:
1101590Srgrimes			usage();
111169636Sjmallett		}
1121590Srgrimes
11397527Stjr	argc -= optind;
114169638Sjmallett	argv += optind;
1151590Srgrimes
1161590Srgrimes	/* If no flags are set, default is -d -u. */
1171590Srgrimes	if (cflag) {
1181590Srgrimes		if (dflag || uflag)
1191590Srgrimes			usage();
1201590Srgrimes	} else if (!dflag && !uflag)
1211590Srgrimes		dflag = uflag = 1;
1221590Srgrimes
12397529Stjr	if (argc > 2)
12497529Stjr		usage();
12597529Stjr
12697529Stjr	ifp = stdin;
127131502Stjr	ifn = "stdin";
12897529Stjr	ofp = stdout;
12997529Stjr	if (argc > 0 && strcmp(argv[0], "-") != 0)
130131502Stjr		ifp = file(ifn = argv[0], "r");
13197529Stjr	if (argc > 1)
1321590Srgrimes		ofp = file(argv[1], "w");
1331590Srgrimes
134204876Sache	prevbuflen = thisbuflen = 0;
135204876Sache	prevline = thisline = NULL;
1361590Srgrimes
137204876Sache	if (getline(&prevline, &prevbuflen, ifp) < 0) {
138169639Sjmallett		if (ferror(ifp))
139169638Sjmallett			err(1, "%s", ifn);
1401590Srgrimes		exit(0);
141131502Stjr	}
142204876Sache	tprev = convert(prevline);
143204803Sache
14499433Stjr	if (!cflag && uflag && dflag)
14599433Stjr		show(ofp, prevline);
1461590Srgrimes
147204876Sache	tthis = NULL;
148204876Sache	while (getline(&thisline, &thisbuflen, ifp) >= 0) {
149204876Sache		if (tthis != NULL)
150204876Sache			free(tthis);
151204876Sache		tthis = convert(thisline);
1521590Srgrimes
153204803Sache		if (tthis == NULL && tprev == NULL)
154204876Sache			comp = inlcmp(thisline, prevline);
155204803Sache		else if (tthis == NULL || tprev == NULL)
156204803Sache			comp = 1;
15729207Sjoerg		else
158204803Sache			comp = wcscoll(tthis, tprev);
15929207Sjoerg
16029207Sjoerg		if (comp) {
161204803Sache			/* If different, print; set previous to new value. */
16299433Stjr			if (cflag || !dflag || !uflag)
16399433Stjr				show(ofp, prevline);
164204803Sache			p = prevline;
165204876Sache			b1 = prevbuflen;
1661590Srgrimes			prevline = thisline;
167204876Sache			prevbuflen = thisbuflen;
168204876Sache			if (tprev != NULL)
169204876Sache				free(tprev);
170204803Sache			tprev = tthis;
17199433Stjr			if (!cflag && uflag && dflag)
17299433Stjr				show(ofp, prevline);
173204803Sache			thisline = p;
174204876Sache			thisbuflen = b1;
175204876Sache			tthis = NULL;
1761590Srgrimes			repeats = 0;
1771590Srgrimes		} else
1781590Srgrimes			++repeats;
1791590Srgrimes	}
180131502Stjr	if (ferror(ifp))
181169638Sjmallett		err(1, "%s", ifn);
18299433Stjr	if (cflag || !dflag || !uflag)
18399433Stjr		show(ofp, prevline);
1841590Srgrimes	exit(0);
1851590Srgrimes}
1861590Srgrimes
187204876Sachewchar_t *
188204876Sacheconvert(const char *str)
18998545Stjr{
190204876Sache	size_t n;
191204876Sache	wchar_t *buf, *ret, *p;
19298545Stjr
193204876Sache	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
194204876Sache		return (NULL);
195204927Sache	if (SIZE_MAX / sizeof(*buf) < n + 1)
196204927Sache		errx(1, "conversion buffer length overflow");
197204876Sache	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
198204876Sache		err(1, "malloc");
199204876Sache	if (mbstowcs(buf, str, n + 1) != n)
200204876Sache		errx(1, "internal mbstowcs() error");
201204876Sache	/* The last line may not end with \n. */
202204876Sache	if (n > 0 && buf[n - 1] == L'\n')
203204876Sache		buf[n - 1] = L'\0';
204204876Sache
205204876Sache	/* If requested get the chosen fields + character offsets. */
206204876Sache	if (numfields || numchars) {
207204876Sache		if ((ret = wcsdup(skip(buf))) == NULL)
208204876Sache			err(1, "wcsdup");
209204876Sache		free(buf);
210204876Sache	} else
211204876Sache		ret = buf;
212204876Sache
213204876Sache	if (iflag) {
214204876Sache		for (p = ret; *p != L'\0'; p++)
215204876Sache			*p = towlower(*p);
216176119Sghelmer	}
21798545Stjr
218204876Sache	return (ret);
21998545Stjr}
22098545Stjr
221204876Sacheint
222204876Sacheinlcmp(const char *s1, const char *s2)
223204803Sache{
224204876Sache	int c1, c2;
225204803Sache
226204876Sache	while (*s1 == *s2++)
227204876Sache		if (*s1++ == '\0')
228204876Sache			return (0);
229204876Sache	c1 = (unsigned char)*s1;
230204876Sache	c2 = (unsigned char)*(s2 - 1);
231204876Sache	/* The last line may not end with \n. */
232204876Sache	if (c1 == '\n')
233204876Sache		c1 = '\0';
234204876Sache	if (c2 == '\n')
235204876Sache		c2 = '\0';
236204876Sache	return (c1 - c2);
237204803Sache}
238204803Sache
2391590Srgrimes/*
2401590Srgrimes * show --
2411590Srgrimes *	Output a line depending on the flags and number of repetitions
2421590Srgrimes *	of the line.
2431590Srgrimes */
2441590Srgrimesvoid
245204803Sacheshow(FILE *ofp, const char *str)
2461590Srgrimes{
24723690Speter
248135214Stjr	if (cflag)
249204876Sache		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
25028503Scharnier	if ((dflag && repeats) || (uflag && !repeats))
251204876Sache		(void)fprintf(ofp, "%s", str);
2521590Srgrimes}
2531590Srgrimes
254131502Stjrwchar_t *
255131502Stjrskip(wchar_t *str)
2561590Srgrimes{
257102944Sdwmalone	int nchars, nfields;
2581590Srgrimes
259204803Sache	for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
260131502Stjr		while (iswblank(*str))
26198547Stjr			str++;
262204803Sache		while (*str != L'\0' && !iswblank(*str))
26398547Stjr			str++;
26498547Stjr	}
265204803Sache	for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
266204803Sache		;
2671590Srgrimes	return(str);
2681590Srgrimes}
2691590Srgrimes
2701590SrgrimesFILE *
271102944Sdwmalonefile(const char *name, const char *mode)
2721590Srgrimes{
2731590Srgrimes	FILE *fp;
2741590Srgrimes
2751590Srgrimes	if ((fp = fopen(name, mode)) == NULL)
27628503Scharnier		err(1, "%s", name);
2771590Srgrimes	return(fp);
2781590Srgrimes}
2791590Srgrimes
2801590Srgrimesvoid
281102944Sdwmaloneobsolete(char *argv[])
2821590Srgrimes{
2831590Srgrimes	int len;
2841590Srgrimes	char *ap, *p, *start;
2851590Srgrimes
28628503Scharnier	while ((ap = *++argv)) {
2871590Srgrimes		/* Return if "--" or not an option of any form. */
2881590Srgrimes		if (ap[0] != '-') {
2891590Srgrimes			if (ap[0] != '+')
2901590Srgrimes				return;
2911590Srgrimes		} else if (ap[1] == '-')
2921590Srgrimes			return;
29334323Sache		if (!isdigit((unsigned char)ap[1]))
2941590Srgrimes			continue;
2951590Srgrimes		/*
2961590Srgrimes		 * Digit signifies an old-style option.  Malloc space for dash,
2971590Srgrimes		 * new option and argument.
2981590Srgrimes		 */
2991590Srgrimes		len = strlen(ap);
3001590Srgrimes		if ((start = p = malloc(len + 3)) == NULL)
30199433Stjr			err(1, "malloc");
3021590Srgrimes		*p++ = '-';
3031590Srgrimes		*p++ = ap[0] == '+' ? 's' : 'f';
3041590Srgrimes		(void)strcpy(p, ap + 1);
3051590Srgrimes		*argv = start;
3061590Srgrimes	}
3071590Srgrimes}
3081590Srgrimes
30928503Scharnierstatic void
310102944Sdwmaloneusage(void)
3111590Srgrimes{
3121590Srgrimes	(void)fprintf(stderr,
31397905Stjr"usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
3141590Srgrimes	exit(1);
3151590Srgrimes}
316