11590Srgrimes/*
21590Srgrimes * Copyright (c) 1989, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Case Larsen.
71590Srgrimes *
81590Srgrimes * Redistribution and use in source and binary forms, with or without
91590Srgrimes * modification, are permitted provided that the following conditions
101590Srgrimes * are met:
111590Srgrimes * 1. Redistributions of source code must retain the above copyright
121590Srgrimes *    notice, this list of conditions and the following disclaimer.
131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer in the
151590Srgrimes *    documentation and/or other materials provided with the distribution.
161590Srgrimes * 4. Neither the name of the University nor the names of its contributors
171590Srgrimes *    may be used to endorse or promote products derived from this software
181590Srgrimes *    without specific prior written permission.
191590Srgrimes *
201590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301590Srgrimes * SUCH DAMAGE.
311590Srgrimes */
321590Srgrimes
331590Srgrimes#ifndef lint
3428503Scharnierstatic const char copyright[] =
351590Srgrimes"@(#) Copyright (c) 1989, 1993\n\
361590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
371590Srgrimes#endif /* not lint */
381590Srgrimes
391590Srgrimes#ifndef lint
4028503Scharnier#if 0
4123690Speterstatic char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
4228503Scharnier#endif
4328503Scharnierstatic const char rcsid[] =
4450477Speter  "$FreeBSD: releng/10.3/usr.bin/uniq/uniq.c 280250 2015-03-19 12:32:48Z rwatson $";
451590Srgrimes#endif /* not lint */
461590Srgrimes
47280250Srwatson#include <sys/capsicum.h>
48253457Spjd
4928503Scharnier#include <ctype.h>
5028503Scharnier#include <err.h>
51253457Spjd#include <errno.h>
5252455Sache#include <limits.h>
5334323Sache#include <locale.h>
54253457Spjd#include <nl_types.h>
55200633Sjh#include <stdint.h>
56204876Sache#define _WITH_GETLINE
571590Srgrimes#include <stdio.h>
581590Srgrimes#include <stdlib.h>
591590Srgrimes#include <string.h>
60253457Spjd#include <termios.h>
6123690Speter#include <unistd.h>
62131502Stjr#include <wchar.h>
63131502Stjr#include <wctype.h>
641590Srgrimes
65227193Sedstatic int cflag, dflag, uflag, iflag;
66227193Sedstatic int numchars, numfields, repeats;
671590Srgrimes
68227193Sedstatic FILE	*file(const char *, const char *);
69227193Sedstatic wchar_t	*convert(const char *);
70227193Sedstatic int	 inlcmp(const char *, const char *);
71227193Sedstatic void	 show(FILE *, const char *);
72227193Sedstatic wchar_t	*skip(wchar_t *);
73227193Sedstatic void	 obsolete(char *[]);
7492922Simpstatic void	 usage(void);
751590Srgrimes
76253457Spjdstatic void
77253457Spjdstrerror_init(void)
78253457Spjd{
79253457Spjd
80253457Spjd	/*
81253457Spjd	 * Cache NLS data before entering capability mode.
82253457Spjd	 * XXXPJD: There should be strerror_init() and strsignal_init() in libc.
83253457Spjd	 */
84253457Spjd	(void)catopen("libc", NL_CAT_LOCALE);
85253457Spjd}
86253457Spjd
871590Srgrimesint
88102944Sdwmalonemain (int argc, char *argv[])
891590Srgrimes{
90204876Sache	wchar_t *tprev, *tthis;
911590Srgrimes	FILE *ifp, *ofp;
92204803Sache	int ch, comp;
93204876Sache	size_t prevbuflen, thisbuflen, b1;
94204803Sache	char *prevline, *thisline, *p;
95131502Stjr	const char *ifn;
96253457Spjd	cap_rights_t rights;
971590Srgrimes
9895030Sache	(void) setlocale(LC_ALL, "");
9934323Sache
1001590Srgrimes	obsolete(argv);
10197527Stjr	while ((ch = getopt(argc, argv, "cdif:s:u")) != -1)
1021590Srgrimes		switch (ch) {
1031590Srgrimes		case 'c':
1041590Srgrimes			cflag = 1;
1051590Srgrimes			break;
1061590Srgrimes		case 'd':
1071590Srgrimes			dflag = 1;
1081590Srgrimes			break;
10929207Sjoerg		case 'i':
11029207Sjoerg			iflag = 1;
11129207Sjoerg			break;
1121590Srgrimes		case 'f':
1131590Srgrimes			numfields = strtol(optarg, &p, 10);
1141590Srgrimes			if (numfields < 0 || *p)
11528503Scharnier				errx(1, "illegal field skip value: %s", optarg);
1161590Srgrimes			break;
1171590Srgrimes		case 's':
1181590Srgrimes			numchars = strtol(optarg, &p, 10);
1191590Srgrimes			if (numchars < 0 || *p)
12028503Scharnier				errx(1, "illegal character skip value: %s", optarg);
1211590Srgrimes			break;
1221590Srgrimes		case 'u':
1231590Srgrimes			uflag = 1;
1241590Srgrimes			break;
1251590Srgrimes		case '?':
1261590Srgrimes		default:
1271590Srgrimes			usage();
128169636Sjmallett		}
1291590Srgrimes
13097527Stjr	argc -= optind;
131169638Sjmallett	argv += optind;
1321590Srgrimes
1331590Srgrimes	/* If no flags are set, default is -d -u. */
1341590Srgrimes	if (cflag) {
1351590Srgrimes		if (dflag || uflag)
1361590Srgrimes			usage();
1371590Srgrimes	} else if (!dflag && !uflag)
1381590Srgrimes		dflag = uflag = 1;
1391590Srgrimes
14097529Stjr	if (argc > 2)
14197529Stjr		usage();
14297529Stjr
14397529Stjr	ifp = stdin;
144131502Stjr	ifn = "stdin";
14597529Stjr	ofp = stdout;
14697529Stjr	if (argc > 0 && strcmp(argv[0], "-") != 0)
147131502Stjr		ifp = file(ifn = argv[0], "r");
148255219Spjd	cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
149255219Spjd	if (cap_rights_limit(fileno(ifp), &rights) < 0 && errno != ENOSYS)
150253457Spjd		err(1, "unable to limit rights for %s", ifn);
151255219Spjd	cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
15297529Stjr	if (argc > 1)
1531590Srgrimes		ofp = file(argv[1], "w");
154253457Spjd	else
155255219Spjd		cap_rights_set(&rights, CAP_IOCTL);
156255219Spjd	if (cap_rights_limit(fileno(ofp), &rights) < 0 && errno != ENOSYS) {
157253457Spjd		err(1, "unable to limit rights for %s",
158253457Spjd		    argc > 1 ? argv[1] : "stdout");
159253457Spjd	}
160255219Spjd	if (cap_rights_is_set(&rights, CAP_IOCTL)) {
161253457Spjd		unsigned long cmd;
1621590Srgrimes
163253457Spjd		cmd = TIOCGETA; /* required by isatty(3) in printf(3) */
164253457Spjd
165253457Spjd		if (cap_ioctls_limit(fileno(ofp), &cmd, 1) < 0 &&
166253457Spjd		    errno != ENOSYS) {
167253457Spjd			err(1, "unable to limit ioctls for %s",
168253457Spjd			    argc > 1 ? argv[1] : "stdout");
169253457Spjd		}
170253457Spjd	}
171253457Spjd
172253457Spjd	strerror_init();
173253457Spjd	if (cap_enter() < 0 && errno != ENOSYS)
174253457Spjd		err(1, "unable to enter capability mode");
175253457Spjd
176204876Sache	prevbuflen = thisbuflen = 0;
177204876Sache	prevline = thisline = NULL;
1781590Srgrimes
179204876Sache	if (getline(&prevline, &prevbuflen, ifp) < 0) {
180169639Sjmallett		if (ferror(ifp))
181169638Sjmallett			err(1, "%s", ifn);
1821590Srgrimes		exit(0);
183131502Stjr	}
184204876Sache	tprev = convert(prevline);
185204803Sache
18699433Stjr	if (!cflag && uflag && dflag)
18799433Stjr		show(ofp, prevline);
1881590Srgrimes
189204876Sache	tthis = NULL;
190204876Sache	while (getline(&thisline, &thisbuflen, ifp) >= 0) {
191204876Sache		if (tthis != NULL)
192204876Sache			free(tthis);
193204876Sache		tthis = convert(thisline);
1941590Srgrimes
195204803Sache		if (tthis == NULL && tprev == NULL)
196204876Sache			comp = inlcmp(thisline, prevline);
197204803Sache		else if (tthis == NULL || tprev == NULL)
198204803Sache			comp = 1;
19929207Sjoerg		else
200204803Sache			comp = wcscoll(tthis, tprev);
20129207Sjoerg
20229207Sjoerg		if (comp) {
203204803Sache			/* If different, print; set previous to new value. */
20499433Stjr			if (cflag || !dflag || !uflag)
20599433Stjr				show(ofp, prevline);
206204803Sache			p = prevline;
207204876Sache			b1 = prevbuflen;
2081590Srgrimes			prevline = thisline;
209204876Sache			prevbuflen = thisbuflen;
210204876Sache			if (tprev != NULL)
211204876Sache				free(tprev);
212204803Sache			tprev = tthis;
21399433Stjr			if (!cflag && uflag && dflag)
21499433Stjr				show(ofp, prevline);
215204803Sache			thisline = p;
216204876Sache			thisbuflen = b1;
217204876Sache			tthis = NULL;
2181590Srgrimes			repeats = 0;
2191590Srgrimes		} else
2201590Srgrimes			++repeats;
2211590Srgrimes	}
222131502Stjr	if (ferror(ifp))
223169638Sjmallett		err(1, "%s", ifn);
22499433Stjr	if (cflag || !dflag || !uflag)
22599433Stjr		show(ofp, prevline);
2261590Srgrimes	exit(0);
2271590Srgrimes}
2281590Srgrimes
229227193Sedstatic wchar_t *
230204876Sacheconvert(const char *str)
23198545Stjr{
232204876Sache	size_t n;
233204876Sache	wchar_t *buf, *ret, *p;
23498545Stjr
235204876Sache	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
236204876Sache		return (NULL);
237204927Sache	if (SIZE_MAX / sizeof(*buf) < n + 1)
238204927Sache		errx(1, "conversion buffer length overflow");
239204876Sache	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
240204876Sache		err(1, "malloc");
241204876Sache	if (mbstowcs(buf, str, n + 1) != n)
242204876Sache		errx(1, "internal mbstowcs() error");
243204876Sache	/* The last line may not end with \n. */
244204876Sache	if (n > 0 && buf[n - 1] == L'\n')
245204876Sache		buf[n - 1] = L'\0';
246204876Sache
247204876Sache	/* If requested get the chosen fields + character offsets. */
248204876Sache	if (numfields || numchars) {
249204876Sache		if ((ret = wcsdup(skip(buf))) == NULL)
250204876Sache			err(1, "wcsdup");
251204876Sache		free(buf);
252204876Sache	} else
253204876Sache		ret = buf;
254204876Sache
255204876Sache	if (iflag) {
256204876Sache		for (p = ret; *p != L'\0'; p++)
257204876Sache			*p = towlower(*p);
258176119Sghelmer	}
25998545Stjr
260204876Sache	return (ret);
26198545Stjr}
26298545Stjr
263227193Sedstatic int
264204876Sacheinlcmp(const char *s1, const char *s2)
265204803Sache{
266204876Sache	int c1, c2;
267204803Sache
268204876Sache	while (*s1 == *s2++)
269204876Sache		if (*s1++ == '\0')
270204876Sache			return (0);
271204876Sache	c1 = (unsigned char)*s1;
272204876Sache	c2 = (unsigned char)*(s2 - 1);
273204876Sache	/* The last line may not end with \n. */
274204876Sache	if (c1 == '\n')
275204876Sache		c1 = '\0';
276204876Sache	if (c2 == '\n')
277204876Sache		c2 = '\0';
278204876Sache	return (c1 - c2);
279204803Sache}
280204803Sache
2811590Srgrimes/*
2821590Srgrimes * show --
2831590Srgrimes *	Output a line depending on the flags and number of repetitions
2841590Srgrimes *	of the line.
2851590Srgrimes */
286227193Sedstatic void
287204803Sacheshow(FILE *ofp, const char *str)
2881590Srgrimes{
28923690Speter
290135214Stjr	if (cflag)
291204876Sache		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
29228503Scharnier	if ((dflag && repeats) || (uflag && !repeats))
293204876Sache		(void)fprintf(ofp, "%s", str);
2941590Srgrimes}
2951590Srgrimes
296227193Sedstatic wchar_t *
297131502Stjrskip(wchar_t *str)
2981590Srgrimes{
299102944Sdwmalone	int nchars, nfields;
3001590Srgrimes
301204803Sache	for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
302131502Stjr		while (iswblank(*str))
30398547Stjr			str++;
304204803Sache		while (*str != L'\0' && !iswblank(*str))
30598547Stjr			str++;
30698547Stjr	}
307204803Sache	for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
308204803Sache		;
3091590Srgrimes	return(str);
3101590Srgrimes}
3111590Srgrimes
312227193Sedstatic FILE *
313102944Sdwmalonefile(const char *name, const char *mode)
3141590Srgrimes{
3151590Srgrimes	FILE *fp;
3161590Srgrimes
3171590Srgrimes	if ((fp = fopen(name, mode)) == NULL)
31828503Scharnier		err(1, "%s", name);
3191590Srgrimes	return(fp);
3201590Srgrimes}
3211590Srgrimes
322227193Sedstatic void
323102944Sdwmaloneobsolete(char *argv[])
3241590Srgrimes{
3251590Srgrimes	int len;
3261590Srgrimes	char *ap, *p, *start;
3271590Srgrimes
32828503Scharnier	while ((ap = *++argv)) {
3291590Srgrimes		/* Return if "--" or not an option of any form. */
3301590Srgrimes		if (ap[0] != '-') {
3311590Srgrimes			if (ap[0] != '+')
3321590Srgrimes				return;
3331590Srgrimes		} else if (ap[1] == '-')
3341590Srgrimes			return;
33534323Sache		if (!isdigit((unsigned char)ap[1]))
3361590Srgrimes			continue;
3371590Srgrimes		/*
3381590Srgrimes		 * Digit signifies an old-style option.  Malloc space for dash,
3391590Srgrimes		 * new option and argument.
3401590Srgrimes		 */
3411590Srgrimes		len = strlen(ap);
3421590Srgrimes		if ((start = p = malloc(len + 3)) == NULL)
34399433Stjr			err(1, "malloc");
3441590Srgrimes		*p++ = '-';
3451590Srgrimes		*p++ = ap[0] == '+' ? 's' : 'f';
3461590Srgrimes		(void)strcpy(p, ap + 1);
3471590Srgrimes		*argv = start;
3481590Srgrimes	}
3491590Srgrimes}
3501590Srgrimes
35128503Scharnierstatic void
352102944Sdwmaloneusage(void)
3531590Srgrimes{
3541590Srgrimes	(void)fprintf(stderr,
35597905Stjr"usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
3561590Srgrimes	exit(1);
3571590Srgrimes}
358