11590Srgrimes/* 21590Srgrimes * Copyright (c) 1989, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * This code is derived from software contributed to Berkeley by 61590Srgrimes * Case Larsen. 71590Srgrimes * 81590Srgrimes * Redistribution and use in source and binary forms, with or without 91590Srgrimes * modification, are permitted provided that the following conditions 101590Srgrimes * are met: 111590Srgrimes * 1. Redistributions of source code must retain the above copyright 121590Srgrimes * notice, this list of conditions and the following disclaimer. 131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer in the 151590Srgrimes * documentation and/or other materials provided with the distribution. 161590Srgrimes * 4. Neither the name of the University nor the names of its contributors 171590Srgrimes * may be used to endorse or promote products derived from this software 181590Srgrimes * without specific prior written permission. 191590Srgrimes * 201590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301590Srgrimes * SUCH DAMAGE. 311590Srgrimes */ 321590Srgrimes 331590Srgrimes#ifndef lint 3428503Scharnierstatic const char copyright[] = 351590Srgrimes"@(#) Copyright (c) 1989, 1993\n\ 361590Srgrimes The Regents of the University of California. All rights reserved.\n"; 371590Srgrimes#endif /* not lint */ 381590Srgrimes 391590Srgrimes#ifndef lint 4028503Scharnier#if 0 4123690Speterstatic char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 4228503Scharnier#endif 4328503Scharnierstatic const char rcsid[] = 4450477Speter "$FreeBSD$"; 451590Srgrimes#endif /* not lint */ 461590Srgrimes 4728503Scharnier#include <ctype.h> 4828503Scharnier#include <err.h> 4952455Sache#include <limits.h> 5034323Sache#include <locale.h> 51200633Sjh#include <stdint.h> 52204876Sache#define _WITH_GETLINE 531590Srgrimes#include <stdio.h> 541590Srgrimes#include <stdlib.h> 551590Srgrimes#include <string.h> 5623690Speter#include <unistd.h> 57131502Stjr#include <wchar.h> 58131502Stjr#include <wctype.h> 591590Srgrimes 60204803Sacheint cflag, dflag, uflag, iflag; 611590Srgrimesint numchars, numfields, repeats; 621590Srgrimes 6392922SimpFILE *file(const char *, const char *); 64204876Sachewchar_t *convert(const char *); 65204876Sacheint inlcmp(const char *, const char *); 66204803Sachevoid show(FILE *, const char *); 67131502Stjrwchar_t *skip(wchar_t *); 6892922Simpvoid obsolete(char *[]); 6992922Simpstatic void usage(void); 701590Srgrimes 711590Srgrimesint 72102944Sdwmalonemain (int argc, char *argv[]) 731590Srgrimes{ 74204876Sache wchar_t *tprev, *tthis; 751590Srgrimes FILE *ifp, *ofp; 76204803Sache int ch, comp; 77204876Sache size_t prevbuflen, thisbuflen, b1; 78204803Sache char *prevline, *thisline, *p; 79131502Stjr const char *ifn; 801590Srgrimes 8195030Sache (void) setlocale(LC_ALL, ""); 8234323Sache 831590Srgrimes obsolete(argv); 8497527Stjr while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) 851590Srgrimes switch (ch) { 861590Srgrimes case 'c': 871590Srgrimes cflag = 1; 881590Srgrimes break; 891590Srgrimes case 'd': 901590Srgrimes dflag = 1; 911590Srgrimes break; 9229207Sjoerg case 'i': 9329207Sjoerg iflag = 1; 9429207Sjoerg break; 951590Srgrimes case 'f': 961590Srgrimes numfields = strtol(optarg, &p, 10); 971590Srgrimes if (numfields < 0 || *p) 9828503Scharnier errx(1, "illegal field skip value: %s", optarg); 991590Srgrimes break; 1001590Srgrimes case 's': 1011590Srgrimes numchars = strtol(optarg, &p, 10); 1021590Srgrimes if (numchars < 0 || *p) 10328503Scharnier errx(1, "illegal character skip value: %s", optarg); 1041590Srgrimes break; 1051590Srgrimes case 'u': 1061590Srgrimes uflag = 1; 1071590Srgrimes break; 1081590Srgrimes case '?': 1091590Srgrimes default: 1101590Srgrimes usage(); 111169636Sjmallett } 1121590Srgrimes 11397527Stjr argc -= optind; 114169638Sjmallett argv += optind; 1151590Srgrimes 1161590Srgrimes /* If no flags are set, default is -d -u. */ 1171590Srgrimes if (cflag) { 1181590Srgrimes if (dflag || uflag) 1191590Srgrimes usage(); 1201590Srgrimes } else if (!dflag && !uflag) 1211590Srgrimes dflag = uflag = 1; 1221590Srgrimes 12397529Stjr if (argc > 2) 12497529Stjr usage(); 12597529Stjr 12697529Stjr ifp = stdin; 127131502Stjr ifn = "stdin"; 12897529Stjr ofp = stdout; 12997529Stjr if (argc > 0 && strcmp(argv[0], "-") != 0) 130131502Stjr ifp = file(ifn = argv[0], "r"); 13197529Stjr if (argc > 1) 1321590Srgrimes ofp = file(argv[1], "w"); 1331590Srgrimes 134204876Sache prevbuflen = thisbuflen = 0; 135204876Sache prevline = thisline = NULL; 1361590Srgrimes 137204876Sache if (getline(&prevline, &prevbuflen, ifp) < 0) { 138169639Sjmallett if (ferror(ifp)) 139169638Sjmallett err(1, "%s", ifn); 1401590Srgrimes exit(0); 141131502Stjr } 142204876Sache tprev = convert(prevline); 143204803Sache 14499433Stjr if (!cflag && uflag && dflag) 14599433Stjr show(ofp, prevline); 1461590Srgrimes 147204876Sache tthis = NULL; 148204876Sache while (getline(&thisline, &thisbuflen, ifp) >= 0) { 149204876Sache if (tthis != NULL) 150204876Sache free(tthis); 151204876Sache tthis = convert(thisline); 1521590Srgrimes 153204803Sache if (tthis == NULL && tprev == NULL) 154204876Sache comp = inlcmp(thisline, prevline); 155204803Sache else if (tthis == NULL || tprev == NULL) 156204803Sache comp = 1; 15729207Sjoerg else 158204803Sache comp = wcscoll(tthis, tprev); 15929207Sjoerg 16029207Sjoerg if (comp) { 161204803Sache /* If different, print; set previous to new value. */ 16299433Stjr if (cflag || !dflag || !uflag) 16399433Stjr show(ofp, prevline); 164204803Sache p = prevline; 165204876Sache b1 = prevbuflen; 1661590Srgrimes prevline = thisline; 167204876Sache prevbuflen = thisbuflen; 168204876Sache if (tprev != NULL) 169204876Sache free(tprev); 170204803Sache tprev = tthis; 17199433Stjr if (!cflag && uflag && dflag) 17299433Stjr show(ofp, prevline); 173204803Sache thisline = p; 174204876Sache thisbuflen = b1; 175204876Sache tthis = NULL; 1761590Srgrimes repeats = 0; 1771590Srgrimes } else 1781590Srgrimes ++repeats; 1791590Srgrimes } 180131502Stjr if (ferror(ifp)) 181169638Sjmallett err(1, "%s", ifn); 18299433Stjr if (cflag || !dflag || !uflag) 18399433Stjr show(ofp, prevline); 1841590Srgrimes exit(0); 1851590Srgrimes} 1861590Srgrimes 187204876Sachewchar_t * 188204876Sacheconvert(const char *str) 18998545Stjr{ 190204876Sache size_t n; 191204876Sache wchar_t *buf, *ret, *p; 19298545Stjr 193204876Sache if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) 194204876Sache return (NULL); 195204927Sache if (SIZE_MAX / sizeof(*buf) < n + 1) 196204927Sache errx(1, "conversion buffer length overflow"); 197204876Sache if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) 198204876Sache err(1, "malloc"); 199204876Sache if (mbstowcs(buf, str, n + 1) != n) 200204876Sache errx(1, "internal mbstowcs() error"); 201204876Sache /* The last line may not end with \n. */ 202204876Sache if (n > 0 && buf[n - 1] == L'\n') 203204876Sache buf[n - 1] = L'\0'; 204204876Sache 205204876Sache /* If requested get the chosen fields + character offsets. */ 206204876Sache if (numfields || numchars) { 207204876Sache if ((ret = wcsdup(skip(buf))) == NULL) 208204876Sache err(1, "wcsdup"); 209204876Sache free(buf); 210204876Sache } else 211204876Sache ret = buf; 212204876Sache 213204876Sache if (iflag) { 214204876Sache for (p = ret; *p != L'\0'; p++) 215204876Sache *p = towlower(*p); 216176119Sghelmer } 21798545Stjr 218204876Sache return (ret); 21998545Stjr} 22098545Stjr 221204876Sacheint 222204876Sacheinlcmp(const char *s1, const char *s2) 223204803Sache{ 224204876Sache int c1, c2; 225204803Sache 226204876Sache while (*s1 == *s2++) 227204876Sache if (*s1++ == '\0') 228204876Sache return (0); 229204876Sache c1 = (unsigned char)*s1; 230204876Sache c2 = (unsigned char)*(s2 - 1); 231204876Sache /* The last line may not end with \n. */ 232204876Sache if (c1 == '\n') 233204876Sache c1 = '\0'; 234204876Sache if (c2 == '\n') 235204876Sache c2 = '\0'; 236204876Sache return (c1 - c2); 237204803Sache} 238204803Sache 2391590Srgrimes/* 2401590Srgrimes * show -- 2411590Srgrimes * Output a line depending on the flags and number of repetitions 2421590Srgrimes * of the line. 2431590Srgrimes */ 2441590Srgrimesvoid 245204803Sacheshow(FILE *ofp, const char *str) 2461590Srgrimes{ 24723690Speter 248135214Stjr if (cflag) 249204876Sache (void)fprintf(ofp, "%4d %s", repeats + 1, str); 25028503Scharnier if ((dflag && repeats) || (uflag && !repeats)) 251204876Sache (void)fprintf(ofp, "%s", str); 2521590Srgrimes} 2531590Srgrimes 254131502Stjrwchar_t * 255131502Stjrskip(wchar_t *str) 2561590Srgrimes{ 257102944Sdwmalone int nchars, nfields; 2581590Srgrimes 259204803Sache for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { 260131502Stjr while (iswblank(*str)) 26198547Stjr str++; 262204803Sache while (*str != L'\0' && !iswblank(*str)) 26398547Stjr str++; 26498547Stjr } 265204803Sache for (nchars = numchars; nchars-- && *str != L'\0'; ++str) 266204803Sache ; 2671590Srgrimes return(str); 2681590Srgrimes} 2691590Srgrimes 2701590SrgrimesFILE * 271102944Sdwmalonefile(const char *name, const char *mode) 2721590Srgrimes{ 2731590Srgrimes FILE *fp; 2741590Srgrimes 2751590Srgrimes if ((fp = fopen(name, mode)) == NULL) 27628503Scharnier err(1, "%s", name); 2771590Srgrimes return(fp); 2781590Srgrimes} 2791590Srgrimes 2801590Srgrimesvoid 281102944Sdwmaloneobsolete(char *argv[]) 2821590Srgrimes{ 2831590Srgrimes int len; 2841590Srgrimes char *ap, *p, *start; 2851590Srgrimes 28628503Scharnier while ((ap = *++argv)) { 2871590Srgrimes /* Return if "--" or not an option of any form. */ 2881590Srgrimes if (ap[0] != '-') { 2891590Srgrimes if (ap[0] != '+') 2901590Srgrimes return; 2911590Srgrimes } else if (ap[1] == '-') 2921590Srgrimes return; 29334323Sache if (!isdigit((unsigned char)ap[1])) 2941590Srgrimes continue; 2951590Srgrimes /* 2961590Srgrimes * Digit signifies an old-style option. Malloc space for dash, 2971590Srgrimes * new option and argument. 2981590Srgrimes */ 2991590Srgrimes len = strlen(ap); 3001590Srgrimes if ((start = p = malloc(len + 3)) == NULL) 30199433Stjr err(1, "malloc"); 3021590Srgrimes *p++ = '-'; 3031590Srgrimes *p++ = ap[0] == '+' ? 's' : 'f'; 3041590Srgrimes (void)strcpy(p, ap + 1); 3051590Srgrimes *argv = start; 3061590Srgrimes } 3071590Srgrimes} 3081590Srgrimes 30928503Scharnierstatic void 310102944Sdwmaloneusage(void) 3111590Srgrimes{ 3121590Srgrimes (void)fprintf(stderr, 31397905Stjr"usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 3141590Srgrimes exit(1); 3151590Srgrimes} 316