index.c revision 1.4
1/* $OpenBSD: index.c,v 1.4 2010/06/11 08:45:06 martinh Exp $ */ 2 3/* 4 * Copyright (c) 2009 Martin Hedenfalk <martin@bzero.se> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19/* Indices are stored as unique keys in a btree. No data is stored. 20 * The keys are made up of the attribute being indexed, concatenated 21 * with the distinguished name of the entry. The namespace suffix is 22 * stripped, however. 23 * 24 * Below, the namespace suffix dc=example,dc=com is stripped. 25 * 26 * Index b-tree sorts with plain strcmp: 27 * ... 28 * cn=Chunky Bacon,cn=chunky bacon,ou=people, 29 * cn=Chunky Bacon,uid=cbacon,ou=accounts, 30 * cn=Chunky Beans,cn=chunky beans,ou=people, 31 * cn=Chunky Beans,uid=cbeans,ou=accounts, 32 * cn=Crispy Bacon,cn=crispy bacon,ou=people, 33 * ... 34 * sn=Bacon,cn=chunky bacon,ou=people, 35 * sn=Bacon,cn=crispy bacon,ou=people, 36 * sn=Beans,cn=chunky beans,ou=people, 37 * ... 38 * This index can be used for equality, prefix and range searches. 39 * 40 * If an indexed attribute sorts numerically (integer), we might be able 41 * to just pad it with zeros... otherwise this needs to be refined. 42 * 43 * Multiple attributes can be indexed in the same database. 44 * 45 * Presence index can be stored as: 46 * !mail,cn=chunky bacon,ou=people, 47 * !mail,cn=chunky beans,ou=people, 48 * !mail,cn=crispy bacon,ou=people, 49 * 50 * Substring index could probably be stored like a suffix tree: 51 * sn>Bacon,cn=chunky bacon,ou=people, 52 * sn>acon,cn=chunky bacon,ou=people, 53 * sn>con,cn=chunky bacon,ou=people, 54 * sn>on,cn=chunky bacon,ou=people, 55 * sn>n,cn=chunky bacon,ou=people, 56 * 57 * This facilitates both substring and suffix search. 58 * 59 * Approximate index: 60 * sn~[soundex(bacon)],cn=chunky bacon,ou=people, 61 * 62 * One level searches can be indexed as follows: 63 * @<parent-rdn>,<rdn> 64 * example: 65 * @ou=accounts,uid=cbacon 66 * @ou=accounts,uid=cbeans 67 * @ou=people,cn=chunky bacon 68 * @ou=people,cn=chunky beans 69 * @ou=people,cn=crispy bacon 70 * 71 */ 72 73#include <sys/types.h> 74#include <sys/queue.h> 75 76#include <assert.h> 77#include <stdlib.h> 78#include <string.h> 79 80#include "ldapd.h" 81 82static void continue_indexer(int fd, short why, void *arg); 83static void stop_indexer(struct ctl_conn *c); 84 85int 86index_attribute(struct namespace *ns, char *attr, struct btval *dn, 87 struct ber_element *a) 88{ 89 int dnsz, rc; 90 char *s, *t; 91 struct ber_element *v; 92 struct btval key, val; 93 94 assert(ns); 95 assert(ns->indx_txn); 96 assert(attr); 97 assert(dn); 98 assert(a); 99 assert(a->be_next); 100 bzero(&val, sizeof(val)); 101 102 log_debug("indexing %.*s on %s", (int)dn->size, (char *)dn->data, attr); 103 104 dnsz = dn->size - strlen(ns->suffix); 105 106 for (v = a->be_next->be_sub; v; v = v->be_next) { 107 if (ber_get_string(v, &s) != 0) 108 continue; 109 bzero(&key, sizeof(key)); 110 key.size = asprintf(&t, "%s=%s,%.*s", attr, s, dnsz, 111 (char *)dn->data); 112 key.data = t; 113 normalize_dn(key.data); 114 rc = btree_txn_put(NULL, ns->indx_txn, &key, &val, 115 BT_NOOVERWRITE); 116 free(t); 117 if (rc == BT_FAIL) 118 return -1; 119 } 120 121 return 0; 122} 123 124static int 125index_rdn(struct namespace *ns, struct btval *dn) 126{ 127 int dnsz, rdnsz, pdnsz, rc; 128 char *t, *parent_dn; 129 struct btval key, val; 130 131 bzero(&val, sizeof(val)); 132 133 assert(ns); 134 assert(ns->indx_txn); 135 assert(dn); 136 137 dnsz = dn->size - strlen(ns->suffix); 138 if (dnsz-- == 0) 139 return 0; 140 141 parent_dn = memchr(dn->data, ',', dnsz); 142 if (parent_dn == NULL) { 143 rdnsz = dnsz; 144 pdnsz = 0; 145 } else { 146 rdnsz = parent_dn - (char *)dn->data; 147 pdnsz = dnsz - rdnsz - 1; 148 ++parent_dn; 149 } 150 151 key.size = asprintf(&t, "@%.*s,%.*s", pdnsz, parent_dn, 152 rdnsz, (char *)dn->data); 153 key.data = t; 154 log_debug("indexing rdn on %.*s", (int)key.size, (char *)key.data); 155 normalize_dn(key.data); 156 rc = btree_txn_put(NULL, ns->indx_txn, &key, &val, BT_NOOVERWRITE); 157 free(t); 158 if (rc == BT_FAIL) 159 return -1; 160 return 0; 161} 162 163int 164unindex_attribute(struct namespace *ns, char *attr, struct btval *dn, 165 struct ber_element *a) 166{ 167 int dnsz, rc; 168 char *s, *t; 169 struct ber_element *v; 170 struct btval key; 171 172 assert(ns); 173 assert(ns->indx_txn); 174 assert(attr); 175 assert(dn); 176 assert(a); 177 assert(a->be_next); 178 179 log_debug("unindexing %.*s on %s", 180 (int)dn->size, (char *)dn->data, attr); 181 182 dnsz = dn->size - strlen(ns->suffix); 183 184 for (v = a->be_next->be_sub; v; v = v->be_next) { 185 if (ber_get_string(v, &s) != 0) 186 continue; 187 bzero(&key, sizeof(key)); 188 key.size = asprintf(&t, "%s=%s,%.*s", attr, s, dnsz, 189 (char *)dn->data); 190 key.data = t; 191 normalize_dn(key.data); 192 rc = btree_txn_del(NULL, ns->indx_txn, &key, NULL); 193 free(t); 194 if (rc == BT_FAIL) 195 return -1; 196 } 197 198 return 0; 199} 200 201int 202index_entry(struct namespace *ns, struct btval *dn, struct ber_element *elm) 203{ 204 struct ber_element *a; 205 struct attr_index *ai; 206 207 assert(ns); 208 assert(dn); 209 assert(elm); 210 TAILQ_FOREACH(ai, &ns->indices, next) { 211 a = ldap_get_attribute(elm, ai->attr); 212 if (a && index_attribute(ns, ai->attr, dn, a) < 0) 213 return -1; 214 } 215 216 return index_rdn(ns, dn); 217} 218 219static int 220unindex_rdn(struct namespace *ns, struct btval *dn) 221{ 222 int dnsz, rdnsz, rc; 223 char *t, *parent_dn; 224 struct btval key, val; 225 226 bzero(&val, sizeof(val)); 227 228 assert(ns); 229 assert(ns->indx_txn); 230 assert(dn); 231 232 dnsz = dn->size - strlen(ns->suffix); 233 234 parent_dn = memchr(dn->data, ',', dn->size); 235 if (parent_dn++ == NULL) 236 parent_dn = (char *)dn->data + dn->size; 237 rdnsz = parent_dn - (char *)dn->data; 238 239 key.size = asprintf(&t, "@%.*s,%.*s", (dnsz - rdnsz), parent_dn, 240 rdnsz, (char *)dn->data); 241 key.data = t; 242 log_debug("unindexing rdn on %.*s", (int)key.size, (char *)key.data); 243 normalize_dn(key.data); 244 rc = btree_txn_del(NULL, ns->indx_txn, &key, NULL); 245 free(t); 246 if (rc == BT_FAIL) 247 return -1; 248 return 0; 249} 250 251int 252unindex_entry(struct namespace *ns, struct btval *dn, struct ber_element *elm) 253{ 254 struct ber_element *a; 255 struct attr_index *ai; 256 257 assert(ns); 258 assert(dn); 259 assert(elm); 260 TAILQ_FOREACH(ai, &ns->indices, next) { 261 a = ldap_get_attribute(elm, ai->attr); 262 if (a && unindex_attribute(ns, ai->attr, dn, a) < 0) 263 return -1; 264 } 265 266 return unindex_rdn(ns, dn); 267} 268 269/* Reconstruct the full dn from the index key and the namespace suffix. 270 * 271 * Examples: 272 * 273 * index key: 274 * sn=Bacon,cn=chunky bacon,ou=people, 275 * full dn: 276 * cn=chunky bacon,ou=people,dc=example,dc=com 277 * 278 * index key: 279 * @ou=people,cn=chunky bacon 280 * full dn: 281 * cn=chunky bacon,ou=people,dc=example,dc=com 282 * 283 * index key: 284 * @,ou=people 285 * full dn: 286 * ou=people,dc=example,dc=com 287 * 288 * index key: 289 * @ou=staff,ou=people,cn=chunky bacon 290 * full dn: 291 * cn=chunky bacon,ou=staff,ou=people,dc=example,dc=com 292 * 293 * Filled in dn must be freed with btval_reset(). 294 */ 295int 296index_to_dn(struct namespace *ns, struct btval *indx, struct btval *dn) 297{ 298 char *rdn, *parent_rdn, indxtype, *dst; 299 int rdn_sz, prdn_sz; 300 301 /* Skip past first index part to get the RDN. 302 */ 303 304 indxtype = ((char *)indx->data)[0]; 305 if (indxtype == '@') { 306 /* One-level index. 307 * Full DN is made up of rdn + parent rdn + namespace suffix. 308 */ 309 rdn = memrchr(indx->data, ',', indx->size); 310 if (rdn++ == NULL) 311 return -1; 312 rdn_sz = indx->size - (rdn - (char *)indx->data); 313 314 parent_rdn = (char *)indx->data + 1; 315 prdn_sz = rdn - parent_rdn - 1; 316 dn->size = indx->size + strlen(ns->suffix); 317 if (prdn_sz == 0) 318 dn->size--; 319 if ((dn->data = malloc(dn->size)) == NULL) { 320 log_warn("conn_search: malloc"); 321 return -1; 322 } 323 dst = dn->data; 324 bcopy(rdn, dst, rdn_sz); 325 dst += rdn_sz; 326 *dst++ = ','; 327 bcopy(parent_rdn, dst, prdn_sz); 328 dst += prdn_sz; 329 if (prdn_sz > 0) 330 *dst++ = ','; 331 bcopy(ns->suffix, dst, strlen(ns->suffix)); 332 } else { 333 /* Construct the full DN by appending namespace suffix. 334 */ 335 rdn = memchr(indx->data, ',', indx->size); 336 if (rdn++ == NULL) 337 return -1; 338 rdn_sz = indx->size - (rdn - (char *)indx->data); 339 dn->size = rdn_sz + strlen(ns->suffix); 340 if ((dn->data = malloc(dn->size)) == NULL) { 341 log_warn("index_to_dn: malloc"); 342 return -1; 343 } 344 bcopy(rdn, dn->data, rdn_sz); 345 bcopy(ns->suffix, (char *)dn->data + rdn_sz, 346 strlen(ns->suffix)); 347 } 348 349 dn->free_data = 1; 350 351 return 0; 352} 353 354/* Return the next namespace that isn't already being indexed or compacted. 355 */ 356static struct namespace * 357next_namespace(struct namespace *ns) 358{ 359 if (ns == NULL) 360 ns = TAILQ_FIRST(&conf->namespaces); 361 else 362 ns = TAILQ_NEXT(ns, next); 363 364 do { 365 if (ns == NULL || (!ns->indexing && !ns->compacting)) 366 break; 367 } while ((ns = TAILQ_NEXT(ns, next)) != NULL); 368 369 return ns; 370} 371 372static void 373continue_indexer(int fd, short why, void *arg) 374{ 375 struct ctl_conn *c = arg; 376 struct ber_element *elm; 377 struct btval key, val; 378 struct timeval tv; 379 int i, rc = BT_FAIL; 380 381 if (c->cursor == NULL) { 382 log_info("begin indexing namespace %s", c->ns->suffix); 383 c->ncomplete = 0; 384 c->ns->indexing = 1; 385 c->cursor = btree_cursor_open(c->ns->data_db); 386 if (c->cursor == NULL) { 387 log_warn("failed to open cursor"); 388 goto fail; 389 } 390 } 391 392 bzero(&key, sizeof(key)); 393 bzero(&val, sizeof(val)); 394 395 tv.tv_sec = 0; 396 tv.tv_usec = 0; 397 398 if (namespace_begin(c->ns) != 0) { 399 tv.tv_usec = 50000; 400 evtimer_add(&c->ev, &tv); 401 return; 402 } 403 404 for (i = 0; i < 40; i++) { 405 rc = btree_cursor_get(c->cursor, &key, &val, BT_NEXT); 406 if (rc != BT_SUCCESS) 407 break; 408 if ((elm = namespace_db2ber(c->ns, &val)) == NULL) 409 continue; 410 rc = index_entry(c->ns, &key, elm); 411 ber_free_elements(elm); 412 btval_reset(&key); 413 btval_reset(&val); 414 if (rc != 0) 415 goto fail; 416 ++c->ncomplete; 417 } 418 419 if (namespace_commit(c->ns) != 0) 420 goto fail; 421 422 control_report_indexer(c, 0); 423 424 if (rc == BT_NOTFOUND) { 425 log_info("done indexing namespace %s", c->ns->suffix); 426 btree_cursor_close(c->cursor); 427 c->cursor = NULL; 428 c->ns->indexing = 0; 429 control_report_indexer(c, 1); 430 431 if (c->all) 432 c->ns = next_namespace(c->ns); 433 else 434 c->ns = NULL; 435 436 if (c->ns == NULL) { 437 log_info("done indexing all namespaces"); 438 return; 439 } 440 } else if (rc != BT_SUCCESS) 441 goto fail; 442 443 evtimer_add(&c->ev, &tv); 444 return; 445 446fail: 447 if (c->ns != NULL) 448 control_report_indexer(c, -1); 449 control_end(c); 450 stop_indexer(c); 451} 452 453/* Run indexing for the given namespace, or all namespaces if ns is NULL. 454 * 455 * Returns 0 on success, or -1 on error. 456 */ 457int 458run_indexer(struct ctl_conn *c, struct namespace *ns) 459{ 460 if (ns == NULL) { 461 c->all = 1; 462 c->ns = next_namespace(NULL); 463 } else { 464 c->all = 0; 465 c->ns = ns; 466 } 467 468 if (c->ns == NULL) { 469 control_end(c); 470 } else { 471 c->closecb = stop_indexer; 472 evtimer_set(&c->ev, continue_indexer, c); 473 continue_indexer(0, 0, c); 474 } 475 476 return 0; 477} 478 479static void 480stop_indexer(struct ctl_conn *c) 481{ 482 if (c->ns != NULL) { 483 log_info("stopped indexing namespace %s", c->ns->suffix); 484 c->ns->indexing = 0; 485 } 486 btree_cursor_close(c->cursor); 487 c->cursor = NULL; 488 c->ns = NULL; 489 c->closecb = NULL; 490 evtimer_del(&c->ev); 491} 492 493