1206156Sume/* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */ 2206156Sume 3206156Sume/*- 4206156Sume * Copyright (c) 2007 The NetBSD Foundation, Inc. 5206156Sume * All rights reserved. 6206156Sume * 7206156Sume * This code is derived from software contributed to The NetBSD Foundation 8206156Sume * by Christos Zoulas. 9206156Sume * 10206156Sume * Redistribution and use in source and binary forms, with or without 11206156Sume * modification, are permitted provided that the following conditions 12206156Sume * are met: 13206156Sume * 1. Redistributions of source code must retain the above copyright 14206156Sume * notice, this list of conditions and the following disclaimer. 15206156Sume * 2. Redistributions in binary form must reproduce the above copyright 16206156Sume * notice, this list of conditions and the following disclaimer in the 17206156Sume * documentation and/or other materials provided with the distribution. 18206156Sume * 19206156Sume * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20206156Sume * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21206156Sume * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22206156Sume * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23206156Sume * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24206156Sume * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25206156Sume * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26206156Sume * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27206156Sume * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28206156Sume * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29206156Sume * POSSIBILITY OF SUCH DAMAGE. 30206156Sume */ 31206156Sume#include <sys/cdefs.h> 32206156Sume__FBSDID("$FreeBSD$"); 33206156Sume 34206156Sume#include <stdio.h> 35206156Sume#include <string.h> 36206156Sume#include <stdlib.h> 37206156Sume#include <db.h> 38206156Sume#include <err.h> 39206156Sume#include <libutil.h> 40206156Sume#include <ctype.h> 41206156Sume#include <fcntl.h> 42206156Sume 43206156Sumeextern const HASHINFO hinfo; 44206156Sume 45206156Sumevoid uniq(const char *); 46206156Sumestatic int comp(const char *, char **, size_t *); 47206156Sume 48206156Sume/* 49206156Sume * Preserve only unique content lines in a file. Input lines that have 50206156Sume * content [alphanumeric characters before a comment] are white-space 51206156Sume * normalized and have their comments removed. Then they are placed 52206156Sume * in a hash table, and only the first instance of them is printed. 53206156Sume * Comment lines without any alphanumeric content are always printed 54206156Sume * since they are there to make the file "pretty". Comment lines with 55206156Sume * alphanumeric content are also placed into the hash table and only 56206156Sume * printed once. 57206156Sume */ 58206156Sumevoid 59206156Sumeuniq(const char *fname) 60206156Sume{ 61206156Sume DB *db; 62206156Sume DBT key; 63206156Sume static const DBT data = { NULL, 0 }; 64206156Sume FILE *fp; 65206156Sume char *line; 66206156Sume size_t len; 67206156Sume 68206156Sume if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) 69206156Sume err(1, "Cannot create in memory database"); 70206156Sume 71206156Sume if ((fp = fopen(fname, "r")) == NULL) 72206156Sume err(1, "Cannot open `%s'", fname); 73206156Sume while ((line = fgetln(fp, &len)) != NULL) { 74206156Sume size_t complen = len; 75206156Sume char *compline; 76206156Sume if (!comp(line, &compline, &complen)) { 77206156Sume (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 78206156Sume line); 79206156Sume continue; 80206156Sume } 81206156Sume key.data = compline; 82206156Sume key.size = complen; 83206156Sume switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { 84206156Sume case 0: 85206156Sume (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 86206156Sume line); 87206156Sume break; 88206156Sume case 1: 89206156Sume break; 90206156Sume case -1: 91206156Sume err(1, "put"); 92206156Sume default: 93206156Sume abort(); 94206156Sume break; 95206156Sume } 96206156Sume } 97206156Sume (void)fflush(stdout); 98206156Sume exit(0); 99206156Sume} 100206156Sume 101206156Sume/* 102206156Sume * normalize whitespace in the original line and place a new string 103206156Sume * with whitespace converted to a single space in compline. If the line 104206156Sume * contains just comments, we preserve them. If it contains data and 105206156Sume * comments, we kill the comments. Return 1 if the line had actual 106206156Sume * contents, or 0 if it was just a comment without alphanumeric characters. 107206156Sume */ 108206156Sumestatic int 109206156Sumecomp(const char *origline, char **compline, size_t *len) 110206156Sume{ 111206156Sume const unsigned char *p; 112206156Sume unsigned char *q; 113206156Sume char *cline; 114206156Sume size_t l = *len, complen; 115206156Sume int hasalnum, iscomment; 116206156Sume 117206156Sume /* Eat leading space */ 118206156Sume for (p = (const unsigned char *)origline; l && *p && isspace(*p); 119206156Sume p++, l--) 120206156Sume continue; 121206156Sume if ((cline = malloc(l + 1)) == NULL) 122206156Sume err(1, "Cannot allocate %zu bytes", l + 1); 123206156Sume (void)memcpy(cline, p, l); 124206156Sume cline[l] = '\0'; 125206156Sume if (*cline == '\0') 126206156Sume return 0; 127206156Sume 128206156Sume complen = 0; 129206156Sume hasalnum = 0; 130206156Sume iscomment = 0; 131206156Sume 132206156Sume for (q = (unsigned char *)cline; l && *p; p++, l--) { 133206156Sume if (isspace(*p)) { 134206156Sume if (complen && isspace(q[-1])) 135206156Sume continue; 136206156Sume *q++ = ' '; 137206156Sume complen++; 138206156Sume } else { 139206156Sume if (!iscomment && *p == '#') { 140206156Sume if (hasalnum) 141206156Sume break; 142206156Sume iscomment = 1; 143206156Sume } else 144206156Sume hasalnum |= isalnum(*p); 145206156Sume *q++ = *p; 146206156Sume complen++; 147206156Sume } 148206156Sume } 149206156Sume 150206156Sume /* Eat trailing space */ 151206156Sume while (complen && isspace(q[-1])) { 152206156Sume --q; 153206156Sume --complen; 154206156Sume } 155206156Sume *q = '\0'; 156206156Sume *compline = cline; 157206156Sume *len = complen; 158206156Sume return hasalnum; 159206156Sume} 160