1/*-
2 * Copyright (c) 2006, Maxime Henrion <mux@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/types.h>
30
31#include <assert.h>
32#include <regex.h>
33#include <stdlib.h>
34
35#include "fnmatch.h"
36#include "globtree.h"
37#include "misc.h"
38
39/*
40 * The "GlobTree" interface allows one to construct arbitrarily complex
41 * boolean expressions for evaluating whether to accept or reject a
42 * filename.  The globtree_test() function returns true or false
43 * according to whether the name is accepted or rejected by the
44 * expression.
45 *
46 * Expressions are trees constructed from nodes representing either
47 * primitive matching operations (primaries) or operators that are
48 * applied to their subexpressions.  The simplest primitives are
49 * globtree_false(), which matches nothing, and globtree_true(), which
50 * matches everything.
51 *
52 * A more useful primitive is the matching operation, constructed with
53 * globtree_match().  It will call fnmatch() with the suppliedi
54 * shell-style pattern to determine if the filename matches.
55 *
56 * Expressions can be combined with the boolean operators AND, OR, and
57 * NOT, to form more complex expressions.
58 */
59
60/* Node types. */
61#define	GLOBTREE_NOT		0
62#define	GLOBTREE_AND		1
63#define	GLOBTREE_OR		2
64#define	GLOBTREE_MATCH		3
65#define	GLOBTREE_REGEX		4
66#define	GLOBTREE_TRUE		5
67#define	GLOBTREE_FALSE		6
68
69/* A node. */
70struct globtree {
71	int type;
72	struct globtree *left;
73	struct globtree *right;
74
75	/* The "data" field points to the text pattern for GLOBTREE_MATCH
76	   nodes, and to the regex_t for GLOBTREE_REGEX nodes. For any
77	   other node, it is set to NULL. */
78	void *data;
79	/* The "flags" field contains the flags to pass to fnmatch() for
80	   GLOBTREE_MATCH nodes. */
81	int flags;
82};
83
84static struct globtree	*globtree_new(int);
85static int		 globtree_eval(struct globtree *, const char *);
86
87static struct globtree *
88globtree_new(int type)
89{
90	struct globtree *gt;
91
92	gt = xmalloc(sizeof(struct globtree));
93	gt->type = type;
94	gt->data = NULL;
95	gt->flags = 0;
96	gt->left = NULL;
97	gt->right = NULL;
98	return (gt);
99}
100
101struct globtree *
102globtree_true(void)
103{
104	struct globtree *gt;
105
106	gt = globtree_new(GLOBTREE_TRUE);
107	return (gt);
108}
109
110struct globtree *
111globtree_false(void)
112{
113	struct globtree *gt;
114
115	gt = globtree_new(GLOBTREE_FALSE);
116	return (gt);
117}
118
119struct globtree *
120globtree_match(const char *pattern, int flags)
121{
122	struct globtree *gt;
123
124	gt = globtree_new(GLOBTREE_MATCH);
125	gt->data = xstrdup(pattern);
126	gt->flags = flags;
127	return (gt);
128}
129
130struct globtree *
131globtree_regex(const char *pattern)
132{
133	struct globtree *gt;
134	int error;
135
136	gt = globtree_new(GLOBTREE_REGEX);
137	gt->data = xmalloc(sizeof(regex_t));
138	error = regcomp(gt->data, pattern, REG_NOSUB);
139	assert(!error);
140	return (gt);
141}
142
143struct globtree *
144globtree_and(struct globtree *left, struct globtree *right)
145{
146	struct globtree *gt;
147
148	if (left->type == GLOBTREE_FALSE || right->type == GLOBTREE_FALSE) {
149		globtree_free(left);
150		globtree_free(right);
151		gt = globtree_false();
152		return (gt);
153	}
154	if (left->type == GLOBTREE_TRUE) {
155		globtree_free(left);
156		return (right);
157	}
158	if (right->type == GLOBTREE_TRUE) {
159		globtree_free(right);
160		return (left);
161	}
162	gt = globtree_new(GLOBTREE_AND);
163	gt->left = left;
164	gt->right = right;
165	return (gt);
166}
167
168struct globtree *
169globtree_or(struct globtree *left, struct globtree *right)
170{
171	struct globtree *gt;
172
173	if (left->type == GLOBTREE_TRUE || right->type == GLOBTREE_TRUE) {
174		globtree_free(left);
175		globtree_free(right);
176		gt = globtree_true();
177		return (gt);
178	}
179	if (left->type == GLOBTREE_FALSE) {
180		globtree_free(left);
181		return (right);
182	}
183	if (right->type == GLOBTREE_FALSE) {
184		globtree_free(right);
185		return (left);
186	}
187	gt = globtree_new(GLOBTREE_OR);
188	gt->left = left;
189	gt->right = right;
190	return (gt);
191}
192
193struct globtree *
194globtree_not(struct globtree *child)
195{
196	struct globtree *gt;
197
198	if (child->type == GLOBTREE_TRUE) {
199		globtree_free(child);
200		gt = globtree_new(GLOBTREE_FALSE);
201		return (gt);
202	}
203	if (child->type == GLOBTREE_FALSE) {
204		globtree_free(child);
205		gt = globtree_new(GLOBTREE_TRUE);
206		return (gt);
207	}
208	gt = globtree_new(GLOBTREE_NOT);
209	gt->left = child;
210	return (gt);
211}
212
213/* Evaluate one node (must be a leaf node). */
214static int
215globtree_eval(struct globtree *gt, const char *path)
216{
217	int rv;
218
219	switch (gt->type) {
220	case GLOBTREE_TRUE:
221		return (1);
222	case GLOBTREE_FALSE:
223		return (0);
224	case GLOBTREE_MATCH:
225		assert(gt->data != NULL);
226		rv = fnmatch(gt->data, path, gt->flags);
227		if (rv == 0)
228			return (1);
229		assert(rv == FNM_NOMATCH);
230		return (0);
231	case GLOBTREE_REGEX:
232		assert(gt->data != NULL);
233		rv = regexec(gt->data, path, 0, NULL, 0);
234		if (rv == 0)
235			return (1);
236		assert(rv == REG_NOMATCH);
237		return (0);
238	}
239
240	assert(0);
241	return (-1);
242}
243
244/* Small stack API to walk the tree iteratively. */
245typedef enum {
246	STATE_DOINGLEFT,
247	STATE_DOINGRIGHT
248} walkstate_t;
249
250struct stack {
251	struct stackelem *stack;
252	size_t size;
253	size_t in;
254};
255
256struct stackelem {
257	struct globtree *node;
258	walkstate_t state;
259};
260
261static void
262stack_init(struct stack *stack)
263{
264
265	stack->in = 0;
266	stack->size = 8;	/* Initial size. */
267	stack->stack = xmalloc(sizeof(struct stackelem) * stack->size);
268}
269
270static size_t
271stack_size(struct stack *stack)
272{
273
274	return (stack->in);
275}
276
277static void
278stack_push(struct stack *stack, struct globtree *node, walkstate_t state)
279{
280	struct stackelem *e;
281
282	if (stack->in == stack->size) {
283		stack->size *= 2;
284		stack->stack = xrealloc(stack->stack,
285		    sizeof(struct stackelem) * stack->size);
286	}
287	e = stack->stack + stack->in++;
288	e->node = node;
289	e->state = state;
290}
291
292static void
293stack_pop(struct stack *stack, struct globtree **node, walkstate_t *state)
294{
295	struct stackelem *e;
296
297	assert(stack->in > 0);
298	e = stack->stack + --stack->in;
299	*node = e->node;
300	*state = e->state;
301}
302
303static void
304stack_free(struct stack *s)
305{
306
307	free(s->stack);
308}
309
310/* Tests if the supplied filename matches. */
311int
312globtree_test(struct globtree *gt, const char *path)
313{
314	struct stack stack;
315	walkstate_t state;
316	int val;
317
318	stack_init(&stack);
319	for (;;) {
320doleft:
321		/* Descend to the left until we hit bottom. */
322		while (gt->left != NULL) {
323			stack_push(&stack, gt, STATE_DOINGLEFT);
324			gt = gt->left;
325		}
326
327		/* Now we're at a leaf node.  Evaluate it. */
328		val = globtree_eval(gt, path);
329		/* Ascend, propagating the value through operator nodes. */
330		for (;;) {
331			if (stack_size(&stack) == 0) {
332				stack_free(&stack);
333				return (val);
334			}
335			stack_pop(&stack, &gt, &state);
336			switch (gt->type) {
337			case GLOBTREE_NOT:
338				val = !val;
339				break;
340			case GLOBTREE_AND:
341				/* If we haven't yet evaluated the right subtree
342				   and the partial result is true, descend to
343				   the right.  Otherwise the result is already
344				   determined to be val. */
345				if (state == STATE_DOINGLEFT && val) {
346					stack_push(&stack, gt,
347					    STATE_DOINGRIGHT);
348					gt = gt->right;
349					goto doleft;
350				}
351				break;
352			case GLOBTREE_OR:
353				/* If we haven't yet evaluated the right subtree
354				   and the partial result is false, descend to
355				   the right.  Otherwise the result is already
356				   determined to be val. */
357				if (state == STATE_DOINGLEFT && !val) {
358					stack_push(&stack, gt,
359					    STATE_DOINGRIGHT);
360					gt = gt->right;
361					goto doleft;
362				}
363				break;
364			default:
365				/* We only push nodes that have children. */
366				assert(0);
367				return (-1);
368			}
369		}
370	}
371}
372
373/*
374 * We could de-recursify this function using a stack, but it would be
375 * overkill since it is never called from a thread context with a
376 * limited stack size nor used in a critical path, so I think we can
377 * afford keeping it recursive.
378 */
379void
380globtree_free(struct globtree *gt)
381{
382
383	if (gt->data != NULL) {
384		if (gt->type == GLOBTREE_REGEX)
385			regfree(gt->data);
386		free(gt->data);
387	}
388	if (gt->left != NULL)
389		globtree_free(gt->left);
390	if (gt->right != NULL)
391		globtree_free(gt->right);
392	free(gt);
393}
394