index.c revision 1.4
1/*	$OpenBSD: index.c,v 1.4 2010/06/11 08:45:06 martinh Exp $ */
2
3/*
4 * Copyright (c) 2009 Martin Hedenfalk <martin@bzero.se>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/* Indices are stored as unique keys in a btree. No data is stored.
20 * The keys are made up of the attribute being indexed, concatenated
21 * with the distinguished name of the entry. The namespace suffix is
22 * stripped, however.
23 *
24 * Below, the namespace suffix dc=example,dc=com is stripped.
25 *
26 * Index b-tree sorts with plain strcmp:
27 * ...
28 * cn=Chunky Bacon,cn=chunky bacon,ou=people,
29 * cn=Chunky Bacon,uid=cbacon,ou=accounts,
30 * cn=Chunky Beans,cn=chunky beans,ou=people,
31 * cn=Chunky Beans,uid=cbeans,ou=accounts,
32 * cn=Crispy Bacon,cn=crispy bacon,ou=people,
33 * ...
34 * sn=Bacon,cn=chunky bacon,ou=people,
35 * sn=Bacon,cn=crispy bacon,ou=people,
36 * sn=Beans,cn=chunky beans,ou=people,
37 * ...
38 * This index can be used for equality, prefix and range searches.
39 *
40 * If an indexed attribute sorts numerically (integer), we might be able
41 * to just pad it with zeros... otherwise this needs to be refined.
42 *
43 * Multiple attributes can be indexed in the same database.
44 *
45 * Presence index can be stored as:
46 * !mail,cn=chunky bacon,ou=people,
47 * !mail,cn=chunky beans,ou=people,
48 * !mail,cn=crispy bacon,ou=people,
49 *
50 * Substring index could probably be stored like a suffix tree:
51 * sn>Bacon,cn=chunky bacon,ou=people,
52 * sn>acon,cn=chunky bacon,ou=people,
53 * sn>con,cn=chunky bacon,ou=people,
54 * sn>on,cn=chunky bacon,ou=people,
55 * sn>n,cn=chunky bacon,ou=people,
56 *
57 * This facilitates both substring and suffix search.
58 *
59 * Approximate index:
60 * sn~[soundex(bacon)],cn=chunky bacon,ou=people,
61 *
62 * One level searches can be indexed as follows:
63 * @<parent-rdn>,<rdn>
64 * example:
65 * @ou=accounts,uid=cbacon
66 * @ou=accounts,uid=cbeans
67 * @ou=people,cn=chunky bacon
68 * @ou=people,cn=chunky beans
69 * @ou=people,cn=crispy bacon
70 *
71 */
72
73#include <sys/types.h>
74#include <sys/queue.h>
75
76#include <assert.h>
77#include <stdlib.h>
78#include <string.h>
79
80#include "ldapd.h"
81
82static void	 continue_indexer(int fd, short why, void *arg);
83static void	 stop_indexer(struct ctl_conn *c);
84
85int
86index_attribute(struct namespace *ns, char *attr, struct btval *dn,
87    struct ber_element *a)
88{
89	int			 dnsz, rc;
90	char			*s, *t;
91	struct ber_element	*v;
92	struct btval		 key, val;
93
94	assert(ns);
95	assert(ns->indx_txn);
96	assert(attr);
97	assert(dn);
98	assert(a);
99	assert(a->be_next);
100	bzero(&val, sizeof(val));
101
102	log_debug("indexing %.*s on %s", (int)dn->size, (char *)dn->data, attr);
103
104	dnsz = dn->size - strlen(ns->suffix);
105
106	for (v = a->be_next->be_sub; v; v = v->be_next) {
107		if (ber_get_string(v, &s) != 0)
108			continue;
109		bzero(&key, sizeof(key));
110		key.size = asprintf(&t, "%s=%s,%.*s", attr, s, dnsz,
111		    (char *)dn->data);
112		key.data = t;
113		normalize_dn(key.data);
114		rc = btree_txn_put(NULL, ns->indx_txn, &key, &val,
115		    BT_NOOVERWRITE);
116		free(t);
117		if (rc == BT_FAIL)
118			return -1;
119	}
120
121	return 0;
122}
123
124static int
125index_rdn(struct namespace *ns, struct btval *dn)
126{
127	int		 dnsz, rdnsz, pdnsz, rc;
128	char		*t, *parent_dn;
129	struct btval	 key, val;
130
131	bzero(&val, sizeof(val));
132
133	assert(ns);
134	assert(ns->indx_txn);
135	assert(dn);
136
137	dnsz = dn->size - strlen(ns->suffix);
138	if (dnsz-- == 0)
139		return 0;
140
141	parent_dn = memchr(dn->data, ',', dnsz);
142	if (parent_dn == NULL) {
143		rdnsz = dnsz;
144		pdnsz = 0;
145	} else {
146		rdnsz = parent_dn - (char *)dn->data;
147		pdnsz = dnsz - rdnsz - 1;
148		++parent_dn;
149	}
150
151	key.size = asprintf(&t, "@%.*s,%.*s", pdnsz, parent_dn,
152	    rdnsz, (char *)dn->data);
153	key.data = t;
154	log_debug("indexing rdn on %.*s", (int)key.size, (char *)key.data);
155	normalize_dn(key.data);
156	rc = btree_txn_put(NULL, ns->indx_txn, &key, &val, BT_NOOVERWRITE);
157	free(t);
158	if (rc == BT_FAIL)
159		return -1;
160	return 0;
161}
162
163int
164unindex_attribute(struct namespace *ns, char *attr, struct btval *dn,
165    struct ber_element *a)
166{
167	int			 dnsz, rc;
168	char			*s, *t;
169	struct ber_element	*v;
170	struct btval		 key;
171
172	assert(ns);
173	assert(ns->indx_txn);
174	assert(attr);
175	assert(dn);
176	assert(a);
177	assert(a->be_next);
178
179	log_debug("unindexing %.*s on %s",
180	    (int)dn->size, (char *)dn->data, attr);
181
182	dnsz = dn->size - strlen(ns->suffix);
183
184	for (v = a->be_next->be_sub; v; v = v->be_next) {
185		if (ber_get_string(v, &s) != 0)
186			continue;
187		bzero(&key, sizeof(key));
188		key.size = asprintf(&t, "%s=%s,%.*s", attr, s, dnsz,
189		    (char *)dn->data);
190		key.data = t;
191		normalize_dn(key.data);
192		rc = btree_txn_del(NULL, ns->indx_txn, &key, NULL);
193		free(t);
194		if (rc == BT_FAIL)
195			return -1;
196	}
197
198	return 0;
199}
200
201int
202index_entry(struct namespace *ns, struct btval *dn, struct ber_element *elm)
203{
204	struct ber_element	*a;
205	struct attr_index	*ai;
206
207	assert(ns);
208	assert(dn);
209	assert(elm);
210	TAILQ_FOREACH(ai, &ns->indices, next) {
211		a = ldap_get_attribute(elm, ai->attr);
212		if (a && index_attribute(ns, ai->attr, dn, a) < 0)
213			return -1;
214	}
215
216	return index_rdn(ns, dn);
217}
218
219static int
220unindex_rdn(struct namespace *ns, struct btval *dn)
221{
222	int		 dnsz, rdnsz, rc;
223	char		*t, *parent_dn;
224	struct btval	 key, val;
225
226	bzero(&val, sizeof(val));
227
228	assert(ns);
229	assert(ns->indx_txn);
230	assert(dn);
231
232	dnsz = dn->size - strlen(ns->suffix);
233
234	parent_dn = memchr(dn->data, ',', dn->size);
235	if (parent_dn++ == NULL)
236		parent_dn = (char *)dn->data + dn->size;
237	rdnsz = parent_dn - (char *)dn->data;
238
239	key.size = asprintf(&t, "@%.*s,%.*s", (dnsz - rdnsz), parent_dn,
240	    rdnsz, (char *)dn->data);
241	key.data = t;
242	log_debug("unindexing rdn on %.*s", (int)key.size, (char *)key.data);
243	normalize_dn(key.data);
244	rc = btree_txn_del(NULL, ns->indx_txn, &key, NULL);
245	free(t);
246	if (rc == BT_FAIL)
247		return -1;
248	return 0;
249}
250
251int
252unindex_entry(struct namespace *ns, struct btval *dn, struct ber_element *elm)
253{
254	struct ber_element	*a;
255	struct attr_index	*ai;
256
257	assert(ns);
258	assert(dn);
259	assert(elm);
260	TAILQ_FOREACH(ai, &ns->indices, next) {
261		a = ldap_get_attribute(elm, ai->attr);
262		if (a && unindex_attribute(ns, ai->attr, dn, a) < 0)
263			return -1;
264	}
265
266	return unindex_rdn(ns, dn);
267}
268
269/* Reconstruct the full dn from the index key and the namespace suffix.
270 *
271 * Examples:
272 *
273 * index key:
274 *   sn=Bacon,cn=chunky bacon,ou=people,
275 * full dn:
276 *   cn=chunky bacon,ou=people,dc=example,dc=com
277 *
278 * index key:
279 *   @ou=people,cn=chunky bacon
280 * full dn:
281 *   cn=chunky bacon,ou=people,dc=example,dc=com
282 *
283 * index key:
284 *   @,ou=people
285 * full dn:
286 *   ou=people,dc=example,dc=com
287 *
288 * index key:
289 *   @ou=staff,ou=people,cn=chunky bacon
290 * full dn:
291 *   cn=chunky bacon,ou=staff,ou=people,dc=example,dc=com
292 *
293 * Filled in dn must be freed with btval_reset().
294 */
295int
296index_to_dn(struct namespace *ns, struct btval *indx, struct btval *dn)
297{
298	char		*rdn, *parent_rdn, indxtype, *dst;
299	int		 rdn_sz, prdn_sz;
300
301	/* Skip past first index part to get the RDN.
302	 */
303
304	indxtype = ((char *)indx->data)[0];
305	if (indxtype == '@') {
306		/* One-level index.
307		 * Full DN is made up of rdn + parent rdn + namespace suffix.
308		 */
309		rdn = memrchr(indx->data, ',', indx->size);
310		if (rdn++ == NULL)
311			return -1;
312		rdn_sz = indx->size - (rdn - (char *)indx->data);
313
314		parent_rdn = (char *)indx->data + 1;
315		prdn_sz = rdn - parent_rdn - 1;
316		dn->size = indx->size + strlen(ns->suffix);
317		if (prdn_sz == 0)
318			dn->size--;
319		if ((dn->data = malloc(dn->size)) == NULL) {
320			log_warn("conn_search: malloc");
321			return -1;
322		}
323		dst = dn->data;
324		bcopy(rdn, dst, rdn_sz);
325		dst += rdn_sz;
326		*dst++ = ',';
327		bcopy(parent_rdn, dst, prdn_sz);
328		dst += prdn_sz;
329		if (prdn_sz > 0)
330			*dst++ = ',';
331		bcopy(ns->suffix, dst, strlen(ns->suffix));
332	} else {
333		/* Construct the full DN by appending namespace suffix.
334		 */
335		rdn = memchr(indx->data, ',', indx->size);
336		if (rdn++ == NULL)
337			return -1;
338		rdn_sz = indx->size - (rdn - (char *)indx->data);
339		dn->size = rdn_sz + strlen(ns->suffix);
340		if ((dn->data = malloc(dn->size)) == NULL) {
341			log_warn("index_to_dn: malloc");
342			return -1;
343		}
344		bcopy(rdn, dn->data, rdn_sz);
345		bcopy(ns->suffix, (char *)dn->data + rdn_sz,
346		    strlen(ns->suffix));
347	}
348
349	dn->free_data = 1;
350
351	return 0;
352}
353
354/* Return the next namespace that isn't already being indexed or compacted.
355 */
356static struct namespace *
357next_namespace(struct namespace *ns)
358{
359	if (ns == NULL)
360		ns = TAILQ_FIRST(&conf->namespaces);
361	else
362		ns = TAILQ_NEXT(ns, next);
363
364	do {
365		if (ns == NULL || (!ns->indexing && !ns->compacting))
366			break;
367	} while ((ns = TAILQ_NEXT(ns, next)) != NULL);
368
369	return ns;
370}
371
372static void
373continue_indexer(int fd, short why, void *arg)
374{
375	struct ctl_conn		*c = arg;
376	struct ber_element	*elm;
377	struct btval		 key, val;
378	struct timeval		 tv;
379	int			 i, rc = BT_FAIL;
380
381	if (c->cursor == NULL) {
382		log_info("begin indexing namespace %s", c->ns->suffix);
383		c->ncomplete = 0;
384		c->ns->indexing = 1;
385		c->cursor = btree_cursor_open(c->ns->data_db);
386		if (c->cursor == NULL) {
387			log_warn("failed to open cursor");
388			goto fail;
389		}
390	}
391
392	bzero(&key, sizeof(key));
393	bzero(&val, sizeof(val));
394
395	tv.tv_sec = 0;
396	tv.tv_usec = 0;
397
398	if (namespace_begin(c->ns) != 0) {
399		tv.tv_usec = 50000;
400		evtimer_add(&c->ev, &tv);
401		return;
402	}
403
404	for (i = 0; i < 40; i++) {
405		rc = btree_cursor_get(c->cursor, &key, &val, BT_NEXT);
406		if (rc != BT_SUCCESS)
407			break;
408		if ((elm = namespace_db2ber(c->ns, &val)) == NULL)
409			continue;
410		rc = index_entry(c->ns, &key, elm);
411		ber_free_elements(elm);
412		btval_reset(&key);
413		btval_reset(&val);
414		if (rc != 0)
415			goto fail;
416		++c->ncomplete;
417	}
418
419	if (namespace_commit(c->ns) != 0)
420		goto fail;
421
422	control_report_indexer(c, 0);
423
424	if (rc == BT_NOTFOUND) {
425		log_info("done indexing namespace %s", c->ns->suffix);
426		btree_cursor_close(c->cursor);
427		c->cursor = NULL;
428		c->ns->indexing = 0;
429		control_report_indexer(c, 1);
430
431		if (c->all)
432			c->ns = next_namespace(c->ns);
433		else
434			c->ns = NULL;
435
436		if (c->ns == NULL) {
437			log_info("done indexing all namespaces");
438			return;
439		}
440	} else if (rc != BT_SUCCESS)
441		goto fail;
442
443	evtimer_add(&c->ev, &tv);
444	return;
445
446fail:
447	if (c->ns != NULL)
448		control_report_indexer(c, -1);
449	control_end(c);
450	stop_indexer(c);
451}
452
453/* Run indexing for the given namespace, or all namespaces if ns is NULL.
454 *
455 * Returns 0 on success, or -1 on error.
456 */
457int
458run_indexer(struct ctl_conn *c, struct namespace *ns)
459{
460	if (ns == NULL) {
461		c->all = 1;
462		c->ns = next_namespace(NULL);
463	} else {
464		c->all = 0;
465		c->ns = ns;
466	}
467
468	if (c->ns == NULL) {
469		control_end(c);
470	} else {
471		c->closecb = stop_indexer;
472		evtimer_set(&c->ev, continue_indexer, c);
473		continue_indexer(0, 0, c);
474	}
475
476	return 0;
477}
478
479static void
480stop_indexer(struct ctl_conn *c)
481{
482	if (c->ns != NULL) {
483		log_info("stopped indexing namespace %s", c->ns->suffix);
484		c->ns->indexing = 0;
485	}
486	btree_cursor_close(c->cursor);
487	c->cursor = NULL;
488	c->ns = NULL;
489	c->closecb = NULL;
490	evtimer_del(&c->ev);
491}
492
493