1/* ldif-filter -- clean up LDIF testdata from stdin */
2/* $OpenLDAP$ */
3/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 *
5 * Copyright 2009-2011 The OpenLDAP Foundation.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
10 * Public License.
11 *
12 * A copy of this license is available in file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
15 */
16
17#include "portable.h"
18
19#include <stdio.h>
20#include <ac/ctype.h>
21#include <ac/stdlib.h>
22#include <ac/string.h>
23#include <ac/unistd.h>
24
25#define DEFAULT_SPECS "ndb=a,null=n"
26
27typedef struct { char   *val; size_t len, alloc; } String;
28typedef struct { String	*val; size_t len, alloc; } Strings;
29
30/* Flags and corresponding program options */
31enum { SORT_ATTRS = 1, SORT_ENTRIES = 2, NO_OUTPUT = 4, DUMMY_FLAG = 8 };
32static const char spec_options[] = "aen"; /* option index = log2(enum flag) */
33
34static const char *progname = "ldif-filter";
35static const String null_string = { NULL, 0, 0 };
36
37static void
38usage( void )
39{
40	fprintf( stderr, "\
41Usage: %s [-b backend] [-s spec[,spec]...]\n\
42Filter standard input by first <spec> matching '[<backend>]=[a][e][n]':\n\
43  - Remove LDIF comments.\n\
44  - 'a': Sort attributes in entries.\n\
45  - 'e': Sort any entries separated by just one empty line.\n\
46  - 'n': Output nothing.\n\
47<backend> defaults to the $BACKEND environment variable.\n\
48Use specs '%s' if no spec on the command line applies.\n",
49		progname, DEFAULT_SPECS );
50	exit( EXIT_FAILURE );
51}
52
53/* Return flags from "backend=flags" in spec; nonzero if backend found */
54static unsigned
55get_flags( const char *backend, const char *spec )
56{
57	size_t len = strlen( backend );
58	unsigned flags = DUMMY_FLAG;
59	const char *tmp;
60
61	while ( '=' != *(spec += strncmp( spec, backend, len ) ? 0 : len) ) {
62		if ( (spec = strchr( spec, ',' )) == NULL ) {
63			return 0;
64		}
65		++spec;
66	}
67	while ( *++spec && *spec != ',' ) {
68		if ( (tmp = strchr( spec_options, *spec )) == NULL ) {
69			usage();
70		}
71		flags |= 1U << (tmp - spec_options);
72	}
73	return flags;
74}
75
76#define APPEND(s /* String or Strings */, data, count, isString) do { \
77	size_t slen = (s)->len, salloc = (s)->alloc, sz = sizeof *(s)->val; \
78	if ( salloc <= slen + (count) ) { \
79		(s)->alloc = salloc += salloc + ((count)|7) + 1; \
80		(s)->val   = xrealloc( (s)->val, sz * salloc ); \
81	} \
82	memcpy( (s)->val + slen, data, sz * ((count) + !!(isString)) ); \
83	(s)->len = slen + (count); \
84} while (0)
85
86static void *
87xrealloc( void *ptr, size_t len )
88{
89	if ( (ptr = realloc( ptr, len )) == NULL ) {
90		perror( progname );
91		exit( EXIT_FAILURE );
92	}
93	return ptr;
94}
95
96static int
97cmp( const void *s, const void *t )
98{
99	return strcmp( ((const String *) s)->val, ((const String *) t)->val );
100}
101
102static void
103sort_strings( Strings *ss, size_t offset )
104{
105	qsort( ss->val + offset, ss->len - offset, sizeof(*ss->val), cmp );
106}
107
108/* Build entry ss[n] from attrs ss[n...], and free the attrs */
109static void
110build_entry( Strings *ss, size_t n, unsigned flags, size_t new_len )
111{
112	String *vals = ss->val, *e = &vals[n];
113	size_t end = ss->len;
114	char *ptr;
115
116	if ( flags & SORT_ATTRS ) {
117		sort_strings( ss, n + 1 );
118	}
119	e->val = xrealloc( e->val, e->alloc = new_len + 1 );
120	ptr = e->val + e->len;
121	e->len = new_len;
122	ss->len = ++n;
123	for ( ; n < end; free( vals[n++].val )) {
124		ptr = strcpy( ptr, vals[n].val ) + vals[n].len;
125	}
126	assert( ptr == e->val + new_len );
127}
128
129/* Flush entries to stdout and free them */
130static void
131flush_entries( Strings *ss, const char *sep, unsigned flags )
132{
133	size_t i, end = ss->len;
134	const char *prefix = "";
135
136	if ( flags & SORT_ENTRIES ) {
137		sort_strings( ss, 0 );
138	}
139	for ( i = 0; i < end; i++, prefix = sep ) {
140		if ( printf( "%s%s", prefix, ss->val[i].val ) < 0 ) {
141			perror( progname );
142			exit( EXIT_FAILURE );
143		}
144		free( ss->val[i].val );
145	}
146	ss->len = 0;
147}
148
149static void
150filter_stdin( unsigned flags )
151{
152	char line[256];
153	Strings ss = { NULL, 0, 0 };	/* entries + attrs of partial entry */
154	size_t entries = 0, attrs_totlen = 0, line_len;
155	const char *entry_sep = "\n", *sep = "";
156	int comment = 0, eof = 0, eol, prev_eol = 1;	/* flags */
157	String *s;
158
159	/* LDIF = Entries ss[..entries-1] + sep + attrs ss[entries..] + line */
160	for ( ; !eof || ss.len || *sep; prev_eol = eol ) {
161		if ( eof || (eof = !fgets( line, sizeof(line), stdin ))) {
162			strcpy( line, prev_eol ? "" : *sep ? sep : "\n" );
163		}
164		line_len = strlen( line );
165		eol = (line_len == 0 || line[line_len - 1] == '\n');
166
167		if ( *line == ' ' ) {		/* continuation line? */
168			prev_eol = 0;
169		} else if ( prev_eol ) {	/* start of logical line? */
170			comment = (*line == '#');
171		}
172		if ( comment || (flags & NO_OUTPUT) ) {
173			continue;
174		}
175
176		/* Collect attrs for partial entry in ss[entries...] */
177		if ( !prev_eol && attrs_totlen != 0 ) {
178			goto grow_attr;
179		} else if ( line_len > (*line == '\r' ? 2 : 1) ) {
180			APPEND( &ss, &null_string, 1, 0 ); /* new attr */
181		grow_attr:
182			s = &ss.val[ss.len - 1];
183			APPEND( s, line, line_len, 1 ); /* strcat to attr */
184			attrs_totlen += line_len;
185			continue;
186		}
187
188		/* Empty line - consume sep+attrs or entries+sep */
189		if ( attrs_totlen != 0 ) {
190			entry_sep = sep;
191			if ( entries == 0 )
192				fputs( sep, stdout );
193			build_entry( &ss, entries++, flags, attrs_totlen );
194			attrs_totlen = 0;
195		} else {
196			flush_entries( &ss, entry_sep, flags );
197			fputs( sep, stdout );
198			entries = 0;
199		}
200		sep = "\r\n" + 2 - line_len;	/* sep = copy(line) */
201	}
202
203	free( ss.val );
204}
205
206int
207main( int argc, char **argv )
208{
209	const char *backend = getenv( "BACKEND" ), *specs = "", *tmp;
210	unsigned flags;
211	int i;
212
213	if ( argc > 0 ) {
214		progname = (tmp = strrchr( argv[0], '/' )) ? tmp+1 : argv[0];
215	}
216
217	while ( (i = getopt( argc, argv, "b:s:" )) != EOF ) {
218		switch ( i ) {
219		case 'b':
220			backend = optarg;
221			break;
222		case 's':
223			specs = optarg;
224			break;
225		default:
226			usage();
227		}
228	}
229	if ( optind < argc ) {
230		usage();
231	}
232	if ( backend == NULL ) {
233		backend = "";
234	}
235
236	flags = get_flags( backend, specs );
237	filter_stdin( flags ? flags : get_flags( backend, DEFAULT_SPECS ));
238	if ( fclose( stdout ) == EOF ) {
239		perror( progname );
240		return EXIT_FAILURE;
241	}
242
243	return EXIT_SUCCESS;
244}
245