1/* uniq -- remove duplicate lines from a sorted file
2   Copyright (C) 86, 91, 1995-1998, 1999 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17
18/* Written by Richard Stallman and David MacKenzie. */
19/* 2000-03-22  Trimmed down to the case of "uniq -u" by Bruno Haible. */
20
21#include <stddef.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25
26/* The name this program was run with. */
27static char *program_name;
28
29static void
30xalloc_fail (void)
31{
32  fprintf (stderr, "%s: virtual memory exhausted\n", program_name);
33  exit (1);
34}
35
36/* Allocate N bytes of memory dynamically, with error checking.  */
37
38void *
39xmalloc (size_t n)
40{
41  void *p;
42
43  p = malloc (n);
44  if (p == 0)
45    xalloc_fail ();
46  return p;
47}
48
49/* Change the size of an allocated block of memory P to N bytes,
50   with error checking.
51   If P is NULL, run xmalloc.  */
52
53void *
54xrealloc (void *p, size_t n)
55{
56  p = realloc (p, n);
57  if (p == 0)
58    xalloc_fail ();
59  return p;
60}
61
62/* A `struct linebuffer' holds a line of text. */
63
64struct linebuffer
65{
66  size_t size;			/* Allocated. */
67  size_t length;		/* Used. */
68  char *buffer;
69};
70
71/* Initialize linebuffer LINEBUFFER for use. */
72
73static void
74initbuffer (struct linebuffer *linebuffer)
75{
76  linebuffer->length = 0;
77  linebuffer->size = 200;
78  linebuffer->buffer = (char *) xmalloc (linebuffer->size);
79}
80
81/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
82   Keep the newline; append a newline if it's the last line of a file
83   that ends in a non-newline character.  Do not null terminate.
84   Return LINEBUFFER, except at end of file return 0.  */
85
86static struct linebuffer *
87readline (struct linebuffer *linebuffer, FILE *stream)
88{
89  int c;
90  char *buffer = linebuffer->buffer;
91  char *p = linebuffer->buffer;
92  char *end = buffer + linebuffer->size - 1; /* Sentinel. */
93
94  if (feof (stream) || ferror (stream))
95    return 0;
96
97  do
98    {
99      c = getc (stream);
100      if (c == EOF)
101	{
102	  if (p == buffer)
103	    return 0;
104	  if (p[-1] == '\n')
105	    break;
106	  c = '\n';
107	}
108      if (p == end)
109	{
110	  linebuffer->size *= 2;
111	  buffer = (char *) xrealloc (buffer, linebuffer->size);
112	  p = p - linebuffer->buffer + buffer;
113	  linebuffer->buffer = buffer;
114	  end = buffer + linebuffer->size - 1;
115	}
116      *p++ = c;
117    }
118  while (c != '\n');
119
120  linebuffer->length = p - buffer;
121  return linebuffer;
122}
123
124/* Free linebuffer LINEBUFFER's data. */
125
126static void
127freebuffer (struct linebuffer *linebuffer)
128{
129  free (linebuffer->buffer);
130}
131
132/* Undefine, to avoid warning about redefinition on some systems.  */
133#undef min
134#define min(x, y) ((x) < (y) ? (x) : (y))
135
136/* Return zero if two strings OLD and NEW match, nonzero if not.
137   OLD and NEW point not to the beginnings of the lines
138   but rather to the beginnings of the fields to compare.
139   OLDLEN and NEWLEN are their lengths. */
140
141static int
142different (const char *old, const char *new, size_t oldlen, size_t newlen)
143{
144  int order;
145
146  order = memcmp (old, new, min (oldlen, newlen));
147
148  if (order == 0)
149    return oldlen - newlen;
150  return order;
151}
152
153/* Output the line in linebuffer LINE to stream STREAM
154   provided that the switches say it should be output.
155   If requested, print the number of times it occurred, as well;
156   LINECOUNT + 1 is the number of times that the line occurred. */
157
158static void
159writeline (const struct linebuffer *line, FILE *stream, int linecount)
160{
161  if (linecount == 0)
162    fwrite (line->buffer, 1, line->length, stream);
163}
164
165/* Process input file INFILE with output to OUTFILE.
166   If either is "-", use the standard I/O stream for it instead. */
167
168static void
169check_file (const char *infile, const char *outfile)
170{
171  FILE *istream;
172  FILE *ostream;
173  struct linebuffer lb1, lb2;
174  struct linebuffer *thisline, *prevline, *exch;
175  char *prevfield, *thisfield;
176  size_t prevlen, thislen;
177  int match_count = 0;
178
179  if (!strcmp (infile, "-"))
180    istream = stdin;
181  else
182    istream = fopen (infile, "r");
183  if (istream == NULL)
184    {
185      fprintf (stderr, "%s: error opening %s\n", program_name, infile);
186      exit (1);
187    }
188
189  if (!strcmp (outfile, "-"))
190    ostream = stdout;
191  else
192    ostream = fopen (outfile, "w");
193  if (ostream == NULL)
194    {
195      fprintf (stderr, "%s: error opening %s\n", program_name, outfile);
196      exit (1);
197    }
198
199  thisline = &lb1;
200  prevline = &lb2;
201
202  initbuffer (thisline);
203  initbuffer (prevline);
204
205  if (readline (prevline, istream) == 0)
206    goto closefiles;
207  prevfield = prevline->buffer;
208  prevlen = prevline->length;
209
210  while (!feof (istream))
211    {
212      int match;
213      if (readline (thisline, istream) == 0)
214	break;
215      thisfield = thisline->buffer;
216      thislen = thisline->length;
217      match = !different (thisfield, prevfield, thislen, prevlen);
218
219      if (match)
220	++match_count;
221
222      if (!match)
223	{
224	  writeline (prevline, ostream, match_count);
225	  exch = prevline;
226	  prevline = thisline;
227	  thisline = exch;
228	  prevfield = thisfield;
229	  prevlen = thislen;
230	  if (!match)
231	    match_count = 0;
232	}
233    }
234
235  writeline (prevline, ostream, match_count);
236
237 closefiles:
238  if (ferror (istream) || fclose (istream) == EOF)
239    {
240      fprintf (stderr, "%s: error reading %s\n", program_name, infile);
241      exit (1);
242    }
243
244  if (ferror (ostream) || fclose (ostream) == EOF)
245    {
246      fprintf (stderr, "%s: error writing %s\n", program_name, outfile);
247      exit (1);
248    }
249
250  freebuffer (&lb1);
251  freebuffer (&lb2);
252}
253
254int
255main (int argc, char **argv)
256{
257  const char *infile = "-";
258  const char *outfile = "-";
259  int optind = 1;
260
261  program_name = argv[0];
262
263  if (optind < argc)
264    infile = argv[optind++];
265
266  if (optind < argc)
267    outfile = argv[optind++];
268
269  if (optind < argc)
270    {
271      fprintf (stderr, "%s: too many arguments\n", program_name);
272      exit (1);
273    }
274
275  check_file (infile, outfile);
276
277  exit (0);
278}
279