1226031Sstas/*	$NetBSD: ldif-filter.c,v 1.3 2021/08/14 16:15:03 christos Exp $	*/
2226031Sstas
3226031Sstas/* ldif-filter -- clean up LDIF testdata from stdin */
4226031Sstas/* $OpenLDAP$ */
5226031Sstas/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6226031Sstas *
7226031Sstas * Copyright 2009-2021 The OpenLDAP Foundation.
8226031Sstas * All rights reserved.
9226031Sstas *
10226031Sstas * Redistribution and use in source and binary forms, with or without
11226031Sstas * modification, are permitted only as authorized by the OpenLDAP
12226031Sstas * Public License.
13226031Sstas *
14226031Sstas * A copy of this license is available in file LICENSE in the
15226031Sstas * top-level directory of the distribution or, alternatively, at
16226031Sstas * <http://www.OpenLDAP.org/license.html>.
17226031Sstas */
18226031Sstas
19226031Sstas#include <sys/cdefs.h>
20226031Sstas__RCSID("$NetBSD: ldif-filter.c,v 1.3 2021/08/14 16:15:03 christos Exp $");
21226031Sstas
22226031Sstas#include "portable.h"
23226031Sstas
24226031Sstas#include <stdio.h>
25226031Sstas#include <ac/ctype.h>
26226031Sstas#include <ac/stdlib.h>
27226031Sstas#include <ac/string.h>
28226031Sstas#include <ac/unistd.h>
29226031Sstas#ifdef _WIN32
30226031Sstas#include <fcntl.h>
31226031Sstas#endif
32226031Sstas
33226031Sstas#define DEFAULT_SPECS "ndb=a,null=n"
34226031Sstas
35226031Sstastypedef struct { char   *val; size_t len, alloc; } String;
36226031Sstastypedef struct { String	*val; size_t len, alloc; } Strings;
37226031Sstas
38226031Sstas/* Flags and corresponding program options */
39226031Sstasenum { SORT_ATTRS = 1, SORT_ENTRIES = 2, NO_OUTPUT = 4, DUMMY_FLAG = 8 };
40226031Sstasstatic const char spec_options[] = "aen"; /* option index = log2(enum flag) */
41226031Sstas
42226031Sstasstatic const char *progname = "ldif-filter";
43226031Sstasstatic const String null_string = { NULL, 0, 0 };
44226031Sstas
45226031Sstasstatic void
46226031Sstasusage( void )
47226031Sstas{
48226031Sstas	fprintf( stderr, "\
49226031SstasUsage: %s [-b backend] [-s spec[,spec]...]\n\
50226031SstasFilter standard input by first <spec> matching '[<backend>]=[a][e][n]':\n\
51226031Sstas  - Remove LDIF comments.\n\
52226031Sstas  - 'a': Sort attributes in entries.\n\
53226031Sstas  - 'e': Sort any entries separated by just one empty line.\n\
54226031Sstas  - 'n': Output nothing.\n\
55226031Sstas<backend> defaults to the $BACKEND environment variable.\n\
56226031SstasUse specs '%s' if no spec on the command line applies.\n",
57226031Sstas		progname, DEFAULT_SPECS );
58226031Sstas	exit( EXIT_FAILURE );
59226031Sstas}
60226031Sstas
61226031Sstas/* Return flags from "backend=flags" in spec; nonzero if backend found */
62226031Sstasstatic unsigned
63226031Sstasget_flags( const char *backend, const char *spec )
64226031Sstas{
65226031Sstas	size_t len = strlen( backend );
66226031Sstas	unsigned flags = DUMMY_FLAG;
67226031Sstas	const char *end, *tmp;
68226031Sstas
69226031Sstas	for ( ;; spec = end + ( *end != '\0' )) {
70226031Sstas		if ( !*spec )
71226031Sstas			return 0;
72226031Sstas		end = spec + strcspn( spec, "," );
73226031Sstas		if ( !(tmp = memchr( spec, '=', end-spec )))
74226031Sstas			break;
75226031Sstas		if ( tmp-spec == len && !memcmp( spec, backend, len )) {
76226031Sstas			spec = tmp+1;
77226031Sstas			break;
78226031Sstas		}
79226031Sstas	}
80226031Sstas
81226031Sstas	for ( ; spec < end; spec++ ) {
82226031Sstas		if ( (tmp = strchr( spec_options, *spec )) == NULL ) {
83226031Sstas			usage();
84226031Sstas		}
85226031Sstas		flags |= 1U << (tmp - spec_options);
86226031Sstas	}
87226031Sstas	return flags;
88226031Sstas}
89226031Sstas
90226031Sstas#define APPEND(s /* String or Strings */, data, count, isString) do { \
91226031Sstas	size_t slen = (s)->len, salloc = (s)->alloc, sz = sizeof *(s)->val; \
92226031Sstas	if ( salloc <= slen + (count) ) { \
93226031Sstas		(s)->alloc = salloc += salloc + ((count)|7) + 1; \
94226031Sstas		(s)->val   = xrealloc( (s)->val, sz * salloc ); \
95226031Sstas	} \
96226031Sstas	memcpy( (s)->val + slen, data, sz * ((count) + !!(isString)) ); \
97226031Sstas	(s)->len = slen + (count); \
98226031Sstas} while (0)
99226031Sstas
100226031Sstasstatic void *
101226031Sstasxrealloc( void *ptr, size_t len )
102226031Sstas{
103226031Sstas	if ( (ptr = realloc( ptr, len )) == NULL ) {
104226031Sstas		perror( progname );
105226031Sstas		exit( EXIT_FAILURE );
106226031Sstas	}
107226031Sstas	return ptr;
108226031Sstas}
109226031Sstas
110226031Sstasstatic int
111226031Sstascmp( const void *s, const void *t )
112226031Sstas{
113226031Sstas	return strcmp( ((const String *) s)->val, ((const String *) t)->val );
114226031Sstas}
115226031Sstas
116226031Sstasstatic void
117226031Sstassort_strings( Strings *ss, size_t offset )
118226031Sstas{
119226031Sstas	qsort( ss->val + offset, ss->len - offset, sizeof(*ss->val), cmp );
120226031Sstas}
121226031Sstas
122226031Sstas/* Build entry ss[n] from attrs ss[n...], and free the attrs */
123226031Sstasstatic void
124226031Sstasbuild_entry( Strings *ss, size_t n, unsigned flags, size_t new_len )
125226031Sstas{
126226031Sstas	String *vals = ss->val, *e = &vals[n];
127226031Sstas	size_t end = ss->len;
128226031Sstas	char *ptr;
129226031Sstas
130226031Sstas	if ( flags & SORT_ATTRS ) {
131226031Sstas		sort_strings( ss, n + 1 );
132226031Sstas	}
133226031Sstas	e->val = xrealloc( e->val, e->alloc = new_len + 1 );
134226031Sstas	ptr = e->val + e->len;
135226031Sstas	e->len = new_len;
136226031Sstas	ss->len = ++n;
137226031Sstas	for ( ; n < end; free( vals[n++].val )) {
138226031Sstas		ptr = strcpy( ptr, vals[n].val ) + vals[n].len;
139226031Sstas	}
140226031Sstas	assert( ptr == e->val + new_len );
141226031Sstas}
142226031Sstas
143226031Sstas/* Flush entries to stdout and free them */
144226031Sstasstatic void
145226031Sstasflush_entries( Strings *ss, const char *sep, unsigned flags )
146226031Sstas{
147226031Sstas	size_t i, end = ss->len;
148226031Sstas	const char *prefix = "";
149226031Sstas
150226031Sstas	if ( flags & SORT_ENTRIES ) {
151226031Sstas		sort_strings( ss, 0 );
152226031Sstas	}
153226031Sstas	for ( i = 0; i < end; i++, prefix = sep ) {
154226031Sstas		if ( printf( "%s%s", prefix, ss->val[i].val ) < 0 ) {
155226031Sstas			perror( progname );
156226031Sstas			exit( EXIT_FAILURE );
157226031Sstas		}
158226031Sstas		free( ss->val[i].val );
159226031Sstas	}
160226031Sstas	ss->len = 0;
161226031Sstas}
162226031Sstas
163226031Sstasstatic void
164226031Sstasfilter_stdin( unsigned flags )
165226031Sstas{
166226031Sstas	char line[256];
167226031Sstas	Strings ss = { NULL, 0, 0 };	/* entries + attrs of partial entry */
168226031Sstas	size_t entries = 0, attrs_totlen = 0, line_len;
169226031Sstas	const char *entry_sep = "\n", *sep = "";
170226031Sstas	int comment = 0, eof = 0, eol, prev_eol = 1;	/* flags */
171226031Sstas	String *s;
172226031Sstas
173226031Sstas	/* LDIF = Entries ss[..entries-1] + sep + attrs ss[entries..] + line */
174226031Sstas	for ( ; !eof || ss.len || *sep; prev_eol = eol ) {
175226031Sstas		if ( eof || (eof = !fgets( line, sizeof(line), stdin ))) {
176226031Sstas			strcpy( line, prev_eol ? "" : *sep ? sep : "\n" );
177226031Sstas		}
178226031Sstas		line_len = strlen( line );
179226031Sstas		eol = (line_len == 0 || line[line_len - 1] == '\n');
180226031Sstas
181226031Sstas		if ( *line == ' ' ) {		/* continuation line? */
182226031Sstas			prev_eol = 0;
183226031Sstas		} else if ( prev_eol ) {	/* start of logical line? */
184226031Sstas			comment = (*line == '#');
185226031Sstas		}
186226031Sstas		if ( comment || (flags & NO_OUTPUT) ) {
187226031Sstas			continue;
188226031Sstas		}
189226031Sstas
190226031Sstas		/* Collect attrs for partial entry in ss[entries...] */
191226031Sstas		if ( !prev_eol && attrs_totlen != 0 ) {
192226031Sstas			goto grow_attr;
193226031Sstas		} else if ( line_len > (*line == '\r' ? 2 : 1) ) {
194226031Sstas			APPEND( &ss, &null_string, 1, 0 ); /* new attr */
195226031Sstas		grow_attr:
196226031Sstas			s = &ss.val[ss.len - 1];
197226031Sstas			APPEND( s, line, line_len, 1 ); /* strcat to attr */
198226031Sstas			attrs_totlen += line_len;
199226031Sstas			continue;
200226031Sstas		}
201226031Sstas
202226031Sstas		/* Empty line - consume sep+attrs or entries+sep */
203226031Sstas		if ( attrs_totlen != 0 ) {
204226031Sstas			entry_sep = sep;
205226031Sstas			if ( entries == 0 )
206226031Sstas				fputs( sep, stdout );
207226031Sstas			build_entry( &ss, entries++, flags, attrs_totlen );
208226031Sstas			attrs_totlen = 0;
209226031Sstas		} else {
210226031Sstas			flush_entries( &ss, entry_sep, flags );
211226031Sstas			fputs( sep, stdout );
212226031Sstas			entries = 0;
213226031Sstas		}
214226031Sstas		sep = "\r\n" + 2 - line_len;	/* sep = copy(line) */
215226031Sstas	}
216226031Sstas
217226031Sstas	free( ss.val );
218226031Sstas}
219226031Sstas
220226031Sstasint
221226031Sstasmain( int argc, char **argv )
222226031Sstas{
223226031Sstas	const char *backend = getenv( "BACKEND" ), *specs = "", *tmp;
224226031Sstas	unsigned flags;
225226031Sstas	int i;
226226031Sstas
227226031Sstas	if ( argc > 0 ) {
228226031Sstas		progname = (tmp = strrchr( argv[0], '/' )) ? tmp+1 : argv[0];
229226031Sstas	}
230226031Sstas
231226031Sstas	while ( (i = getopt( argc, argv, "b:s:" )) != EOF ) {
232226031Sstas		switch ( i ) {
233226031Sstas		case 'b':
234226031Sstas			backend = optarg;
235226031Sstas			break;
236226031Sstas		case 's':
237226031Sstas			specs = optarg;
238226031Sstas			break;
239226031Sstas		default:
240226031Sstas			usage();
241226031Sstas		}
242226031Sstas	}
243226031Sstas	if ( optind < argc ) {
244226031Sstas		usage();
245226031Sstas	}
246226031Sstas	if ( backend == NULL ) {
247226031Sstas		backend = "";
248226031Sstas	}
249226031Sstas
250226031Sstas#ifdef _WIN32
251226031Sstas	_setmode(1, _O_BINARY);	/* don't convert \n to \r\n on stdout */
252226031Sstas#endif
253226031Sstas	flags = get_flags( backend, specs );
254226031Sstas	filter_stdin( flags ? flags : get_flags( backend, DEFAULT_SPECS ));
255226031Sstas	if ( fclose( stdout ) == EOF ) {
256226031Sstas		perror( progname );
257226031Sstas		return EXIT_FAILURE;
258226031Sstas	}
259226031Sstas
260226031Sstas	return EXIT_SUCCESS;
261226031Sstas}
262226031Sstas