1/* $NetBSD: uniq.c,v 1.22 2019/04/23 17:35:10 christos Exp $ */ 2 3/* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Case Larsen. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36#ifndef lint 37__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39#endif /* not lint */ 40 41#ifndef lint 42#if 0 43static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 44#endif 45__RCSID("$NetBSD: uniq.c,v 1.22 2019/04/23 17:35:10 christos Exp $"); 46#endif /* not lint */ 47 48#include <err.h> 49#include <errno.h> 50#include <stdio.h> 51#include <ctype.h> 52#include <stdlib.h> 53#include <string.h> 54#include <unistd.h> 55 56static int cflag, dflag, uflag; 57static int numchars, numfields, repeats; 58 59static FILE *file(const char *, const char *); 60static void show(FILE *, const char *); 61static const char *skip(const char *, size_t *); 62static void obsolete(char *[]); 63static void usage(void) __dead; 64 65int 66main (int argc, char *argv[]) 67{ 68 const char *prevp, *thisp; 69 FILE *ifp, *ofp; 70 int ch; 71 char *prevline, *thisline, *p; 72 size_t prevlinesize, thislinesize, psize; 73 size_t prevlen, thislen; 74 75 setprogname(argv[0]); 76 ifp = ofp = NULL; 77 obsolete(argv); 78 while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) 79 switch (ch) { 80 case 'c': 81 cflag = 1; 82 break; 83 case 'd': 84 dflag = 1; 85 break; 86 case 'f': 87 numfields = strtol(optarg, &p, 10); 88 if (numfields < 0 || *p) 89 errx(1, "illegal field skip value: %s", optarg); 90 break; 91 case 's': 92 numchars = strtol(optarg, &p, 10); 93 if (numchars < 0 || *p) 94 errx(1, "illegal character skip value: %s", 95 optarg); 96 break; 97 case 'u': 98 uflag = 1; 99 break; 100 case '?': 101 default: 102 usage(); 103 } 104 105 argc -= optind; 106 argv +=optind; 107 108 switch(argc) { 109 case 0: 110 ifp = stdin; 111 ofp = stdout; 112 break; 113 case 1: 114 ifp = file(argv[0], "r"); 115 ofp = stdout; 116 break; 117 case 2: 118 ifp = file(argv[0], "r"); 119 ofp = file(argv[1], "w"); 120 break; 121 default: 122 usage(); 123 } 124 125 if ((p = fgetln(ifp, &psize)) == NULL) 126 return 0; 127 prevlinesize = prevlen = psize; 128 if ((prevline = malloc(prevlinesize + 1)) == NULL) 129 err(1, "malloc"); 130 (void)memcpy(prevline, p, prevlinesize); 131 prevline[prevlinesize] = '\0'; 132 133 if (numfields || numchars) 134 prevp = skip(prevline, &prevlen); 135 else 136 prevp = prevline; 137 138 thislinesize = psize; 139 if ((thisline = malloc(thislinesize + 1)) == NULL) 140 err(1, "malloc"); 141 142 while ((p = fgetln(ifp, &psize)) != NULL) { 143 if (psize > thislinesize) { 144 if ((thisline = realloc(thisline, psize + 1)) == NULL) 145 err(1, "realloc"); 146 thislinesize = psize; 147 } 148 thislen = psize; 149 (void)memcpy(thisline, p, psize); 150 thisline[psize] = '\0'; 151 152 /* If requested get the chosen fields + character offsets. */ 153 if (numfields || numchars) { 154 thisp = skip(thisline, &thislen); 155 } else { 156 thisp = thisline; 157 } 158 159 /* If different, print; set previous to new value. */ 160 if (thislen != prevlen || strcmp(thisp, prevp)) { 161 char *t; 162 size_t ts; 163 164 show(ofp, prevline); 165 t = prevline; 166 prevline = thisline; 167 thisline = t; 168 ts = prevlinesize; 169 prevlinesize = thislinesize; 170 thislinesize = ts; 171 prevp = thisp; 172 prevlen = thislen; 173 repeats = 0; 174 } else 175 ++repeats; 176 } 177 show(ofp, prevline); 178 free(prevline); 179 free(thisline); 180 return 0; 181} 182 183/* 184 * show -- 185 * Output a line depending on the flags and number of repetitions 186 * of the line. 187 */ 188static void 189show(FILE *ofp, const char *str) 190{ 191 192 if ((dflag && repeats == 0) || (uflag && repeats > 0)) 193 return; 194 if (cflag) { 195 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 196 } else { 197 (void)fprintf(ofp, "%s", str); 198 } 199} 200 201static const char * 202skip(const char *str, size_t *linesize) 203{ 204 int infield, nchars, nfields; 205 size_t ls = *linesize; 206 207 for (nfields = numfields, infield = 0; nfields && *str; ++str, --ls) 208 if (isspace((unsigned char)*str)) { 209 if (infield) { 210 infield = 0; 211 --nfields; 212 } 213 } else if (!infield) 214 infield = 1; 215 for (nchars = numchars; nchars-- && *str; ++str, --ls) 216 continue; 217 *linesize = ls; 218 return str; 219} 220 221static FILE * 222file(const char *name, const char *mode) 223{ 224 FILE *fp; 225 226 if ((fp = fopen(name, mode)) == NULL) 227 err(1, "%s", name); 228 return(fp); 229} 230 231static void 232obsolete(char *argv[]) 233{ 234 char *ap, *p, *start; 235 236 while ((ap = *++argv) != NULL) { 237 /* Return if "--" or not an option of any form. */ 238 if (ap[0] != '-') { 239 if (ap[0] != '+') 240 return; 241 } else if (ap[1] == '-') 242 return; 243 if (!isdigit((unsigned char)ap[1])) 244 continue; 245 /* 246 * Digit signifies an old-style option. Malloc space for dash, 247 * new option and argument. 248 */ 249 (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); 250 if (!p) 251 err(1, "malloc"); 252 start = p; 253 *argv = start; 254 } 255} 256 257static void 258usage(void) 259{ 260 (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] " 261 "[input_file [output_file]]\n", getprogname()); 262 exit(1); 263} 264