hash.c revision 289180
1/* 2 * hash.c : dumping and reading hash tables to/from files. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <stdlib.h> 27#include <limits.h> 28 29#include <apr_version.h> 30#include <apr_pools.h> 31#include <apr_hash.h> 32#include <apr_file_io.h> 33 34#include "svn_types.h" 35#include "svn_string.h" 36#include "svn_error.h" 37#include "svn_hash.h" 38#include "svn_sorts.h" 39#include "svn_io.h" 40#include "svn_pools.h" 41 42#include "private/svn_dep_compat.h" 43#include "private/svn_sorts_private.h" 44#include "private/svn_subr_private.h" 45 46#include "svn_private_config.h" 47 48 49 50 51/* 52 * The format of a dumped hash table is: 53 * 54 * K <nlength> 55 * name (a string of <nlength> bytes, followed by a newline) 56 * V <vlength> 57 * val (a string of <vlength> bytes, followed by a newline) 58 * [... etc, etc ...] 59 * END 60 * 61 * 62 * (Yes, there is a newline after END.) 63 * 64 * For example: 65 * 66 * K 5 67 * color 68 * V 3 69 * red 70 * K 11 71 * wine review 72 * V 376 73 * A forthright entrance, yet coquettish on the tongue, its deceptively 74 * fruity exterior hides the warm mahagony undercurrent that is the 75 * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will 76 * be pleased to note the familiar, subtle hints of mulberries and 77 * carburator fluid. Its confident finish is marred only by a barely 78 * detectable suggestion of rancid squid ink. 79 * K 5 80 * price 81 * V 8 82 * US $6.50 83 * END 84 * 85 */ 86 87 88 89 90/*** Dumping and loading hash files. */ 91 92/* Implements svn_hash_read2 and svn_hash_read_incremental. */ 93svn_error_t * 94svn_hash__read_entry(svn_hash__entry_t *entry, 95 svn_stream_t *stream, 96 const char *terminator, 97 svn_boolean_t incremental, 98 apr_pool_t *pool) 99{ 100 svn_stringbuf_t *buf; 101 svn_boolean_t eof; 102 apr_size_t len; 103 char c; 104 105 svn_error_t *err; 106 apr_uint64_t ui64; 107 108 /* Read a key length line. Might be END, though. */ 109 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool)); 110 111 /* Check for the end of the hash. */ 112 if ((!terminator && eof && buf->len == 0) 113 || (terminator && (strcmp(buf->data, terminator) == 0))) 114 { 115 entry->key = NULL; 116 entry->keylen = 0; 117 entry->val = NULL; 118 entry->vallen = 0; 119 120 return SVN_NO_ERROR; 121 } 122 123 /* Check for unexpected end of stream */ 124 if (eof) 125 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 126 _("Serialized hash missing terminator")); 127 128 if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' ')) 129 { 130 /* Get the length of the key */ 131 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 132 0, APR_SIZE_MAX, 10); 133 if (err) 134 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 135 _("Serialized hash malformed key length")); 136 entry->keylen = (apr_size_t)ui64; 137 138 /* Now read that much into a buffer. */ 139 entry->key = apr_palloc(pool, entry->keylen + 1); 140 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen)); 141 entry->key[entry->keylen] = '\0'; 142 143 /* Suck up extra newline after key data */ 144 len = 1; 145 SVN_ERR(svn_stream_read_full(stream, &c, &len)); 146 if (c != '\n') 147 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 148 _("Serialized hash malformed key data")); 149 150 /* Read a val length line */ 151 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool)); 152 153 if ((buf->data[0] == 'V') && (buf->data[1] == ' ')) 154 { 155 /* Get the length of the val */ 156 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 157 0, APR_SIZE_MAX, 10); 158 if (err) 159 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 160 _("Serialized hash malformed value length")); 161 entry->vallen = (apr_size_t)ui64; 162 163 entry->val = apr_palloc(pool, entry->vallen + 1); 164 SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen)); 165 entry->val[entry->vallen] = '\0'; 166 167 /* Suck up extra newline after val data */ 168 len = 1; 169 SVN_ERR(svn_stream_read_full(stream, &c, &len)); 170 if (c != '\n') 171 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 172 _("Serialized hash malformed value data")); 173 } 174 else 175 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 176 _("Serialized hash malformed")); 177 } 178 else if (incremental && (buf->len >= 3) 179 && (buf->data[0] == 'D') && (buf->data[1] == ' ')) 180 { 181 /* Get the length of the key */ 182 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 183 0, APR_SIZE_MAX, 10); 184 if (err) 185 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 186 _("Serialized hash malformed key length")); 187 entry->keylen = (apr_size_t)ui64; 188 189 /* Now read that much into a buffer. */ 190 entry->key = apr_palloc(pool, entry->keylen + 1); 191 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen)); 192 entry->key[entry->keylen] = '\0'; 193 194 /* Suck up extra newline after key data */ 195 len = 1; 196 SVN_ERR(svn_stream_read_full(stream, &c, &len)); 197 if (c != '\n') 198 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 199 _("Serialized hash malformed key data")); 200 201 /* Remove this hash entry. */ 202 entry->vallen = 0; 203 entry->val = NULL; 204 } 205 else 206 { 207 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 208 _("Serialized hash malformed")); 209 } 210 211 return SVN_NO_ERROR; 212} 213 214static svn_error_t * 215hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator, 216 svn_boolean_t incremental, apr_pool_t *pool) 217{ 218 apr_pool_t *iterpool = svn_pool_create(pool); 219 220 while (1) 221 { 222 svn_hash__entry_t entry; 223 224 svn_pool_clear(iterpool); 225 SVN_ERR(svn_hash__read_entry(&entry, stream, terminator, 226 incremental, iterpool)); 227 228 /* end of hash? */ 229 if (entry.key == NULL) 230 break; 231 232 if (entry.val) 233 { 234 /* Add a new hash entry. */ 235 apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen), 236 entry.keylen, 237 svn_string_ncreate(entry.val, entry.vallen, pool)); 238 } 239 else 240 { 241 /* Remove this hash entry. */ 242 apr_hash_set(hash, entry.key, entry.keylen, NULL); 243 } 244 } 245 246 svn_pool_destroy(iterpool); 247 return SVN_NO_ERROR; 248} 249 250 251/* Implements svn_hash_write2 and svn_hash_write_incremental. */ 252static svn_error_t * 253hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream, 254 const char *terminator, apr_pool_t *pool) 255{ 256 apr_pool_t *subpool; 257 apr_size_t len; 258 apr_array_header_t *list; 259 int i; 260 261 subpool = svn_pool_create(pool); 262 263 list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool); 264 for (i = 0; i < list->nelts; i++) 265 { 266 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t); 267 svn_string_t *valstr = item->value; 268 269 svn_pool_clear(subpool); 270 271 /* Don't output entries equal to the ones in oldhash, if present. */ 272 if (oldhash) 273 { 274 svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen); 275 276 if (oldstr && svn_string_compare(valstr, oldstr)) 277 continue; 278 } 279 280 if (item->klen < 0) 281 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 282 _("Cannot serialize negative length")); 283 284 /* Write it out. */ 285 SVN_ERR(svn_stream_printf(stream, subpool, 286 "K %" APR_SIZE_T_FMT "\n%s\n" 287 "V %" APR_SIZE_T_FMT "\n", 288 (apr_size_t) item->klen, 289 (const char *) item->key, 290 valstr->len)); 291 len = valstr->len; 292 SVN_ERR(svn_stream_write(stream, valstr->data, &len)); 293 SVN_ERR(svn_stream_puts(stream, "\n")); 294 } 295 296 if (oldhash) 297 { 298 /* Output a deletion entry for each property in oldhash but not hash. */ 299 list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically, 300 pool); 301 for (i = 0; i < list->nelts; i++) 302 { 303 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t); 304 305 svn_pool_clear(subpool); 306 307 /* If it's not present in the new hash, write out a D entry. */ 308 if (! apr_hash_get(hash, item->key, item->klen)) 309 SVN_ERR(svn_stream_printf(stream, subpool, 310 "D %" APR_SSIZE_T_FMT "\n%s\n", 311 item->klen, (const char *) item->key)); 312 } 313 } 314 315 if (terminator) 316 SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator)); 317 318 svn_pool_destroy(subpool); 319 return SVN_NO_ERROR; 320} 321 322 323svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream, 324 const char *terminator, apr_pool_t *pool) 325{ 326 return hash_read(hash, stream, terminator, FALSE, pool); 327} 328 329 330svn_error_t *svn_hash_read_incremental(apr_hash_t *hash, 331 svn_stream_t *stream, 332 const char *terminator, 333 apr_pool_t *pool) 334{ 335 return hash_read(hash, stream, terminator, TRUE, pool); 336} 337 338 339svn_error_t * 340svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream, 341 const char *terminator, apr_pool_t *pool) 342{ 343 return hash_write(hash, NULL, stream, terminator, pool); 344} 345 346 347svn_error_t * 348svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash, 349 svn_stream_t *stream, const char *terminator, 350 apr_pool_t *pool) 351{ 352 SVN_ERR_ASSERT(oldhash != NULL); 353 return hash_write(hash, oldhash, stream, terminator, pool); 354} 355 356 357svn_error_t * 358svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool) 359{ 360 return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool), 361 SVN_HASH_TERMINATOR, pool); 362} 363 364 365/* There are enough quirks in the deprecated svn_hash_read that we 366 should just preserve its implementation. */ 367svn_error_t * 368svn_hash_read(apr_hash_t *hash, 369 apr_file_t *srcfile, 370 apr_pool_t *pool) 371{ 372 svn_error_t *err; 373 char buf[SVN_KEYLINE_MAXLEN]; 374 apr_size_t num_read; 375 char c; 376 int first_time = 1; 377 378 379 while (1) 380 { 381 /* Read a key length line. Might be END, though. */ 382 apr_size_t len = sizeof(buf); 383 384 err = svn_io_read_length_line(srcfile, buf, &len, pool); 385 if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time) 386 { 387 /* We got an EOF on our very first attempt to read, which 388 means it's a zero-byte file. No problem, just go home. */ 389 svn_error_clear(err); 390 return SVN_NO_ERROR; 391 } 392 else if (err) 393 /* Any other circumstance is a genuine error. */ 394 return err; 395 396 first_time = 0; 397 398 if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D')) 399 || ((len == 9) 400 && (buf[0] == 'P') 401 && (buf[1] == 'R') /* We formerly used just "END" to */ 402 && (buf[2] == 'O') /* end a property hash, but later */ 403 && (buf[3] == 'P') /* we added "PROPS-END", so that */ 404 && (buf[4] == 'S') /* the fs dump format would be */ 405 && (buf[5] == '-') /* more human-readable. That's */ 406 && (buf[6] == 'E') /* why we accept either way here. */ 407 && (buf[7] == 'N') 408 && (buf[8] == 'D'))) 409 { 410 /* We've reached the end of the dumped hash table, so leave. */ 411 return SVN_NO_ERROR; 412 } 413 else if ((buf[0] == 'K') && (buf[1] == ' ')) 414 { 415 size_t keylen; 416 int parsed_len; 417 void *keybuf; 418 419 /* Get the length of the key */ 420 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2)); 421 keylen = parsed_len; 422 423 /* Now read that much into a buffer, + 1 byte for null terminator */ 424 keybuf = apr_palloc(pool, keylen + 1); 425 SVN_ERR(svn_io_file_read_full2(srcfile, 426 keybuf, keylen, 427 &num_read, NULL, pool)); 428 ((char *) keybuf)[keylen] = '\0'; 429 430 /* Suck up extra newline after key data */ 431 SVN_ERR(svn_io_file_getc(&c, srcfile, pool)); 432 if (c != '\n') 433 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 434 435 /* Read a val length line */ 436 len = sizeof(buf); 437 SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool)); 438 439 if ((buf[0] == 'V') && (buf[1] == ' ')) 440 { 441 svn_string_t *value = apr_palloc(pool, sizeof(*value)); 442 apr_size_t vallen; 443 void *valbuf; 444 445 /* Get the length of the value */ 446 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2)); 447 vallen = parsed_len; 448 449 /* Again, 1 extra byte for the null termination. */ 450 valbuf = apr_palloc(pool, vallen + 1); 451 SVN_ERR(svn_io_file_read_full2(srcfile, 452 valbuf, vallen, 453 &num_read, NULL, pool)); 454 ((char *) valbuf)[vallen] = '\0'; 455 456 /* Suck up extra newline after val data */ 457 SVN_ERR(svn_io_file_getc(&c, srcfile, pool)); 458 if (c != '\n') 459 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 460 461 value->data = valbuf; 462 value->len = vallen; 463 464 /* The Grand Moment: add a new hash entry! */ 465 apr_hash_set(hash, keybuf, keylen, value); 466 } 467 else 468 { 469 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 470 } 471 } 472 else 473 { 474 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 475 } 476 } /* while (1) */ 477} 478 479 480 481/*** Diffing hashes ***/ 482 483svn_error_t * 484svn_hash_diff(apr_hash_t *hash_a, 485 apr_hash_t *hash_b, 486 svn_hash_diff_func_t diff_func, 487 void *diff_func_baton, 488 apr_pool_t *pool) 489{ 490 apr_hash_index_t *hi; 491 492 if (hash_a) 493 for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi)) 494 { 495 const void *key; 496 apr_ssize_t klen; 497 498 apr_hash_this(hi, &key, &klen, NULL); 499 500 if (hash_b && (apr_hash_get(hash_b, key, klen))) 501 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both, 502 diff_func_baton)); 503 else 504 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a, 505 diff_func_baton)); 506 } 507 508 if (hash_b) 509 for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi)) 510 { 511 const void *key; 512 apr_ssize_t klen; 513 514 apr_hash_this(hi, &key, &klen, NULL); 515 516 if (! (hash_a && apr_hash_get(hash_a, key, klen))) 517 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b, 518 diff_func_baton)); 519 } 520 521 return SVN_NO_ERROR; 522} 523 524 525/*** Misc. hash APIs ***/ 526 527svn_error_t * 528svn_hash_keys(apr_array_header_t **array, 529 apr_hash_t *hash, 530 apr_pool_t *pool) 531{ 532 apr_hash_index_t *hi; 533 534 *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *)); 535 536 for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi)) 537 { 538 APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi); 539 } 540 541 return SVN_NO_ERROR; 542} 543 544 545svn_error_t * 546svn_hash_from_cstring_keys(apr_hash_t **hash_p, 547 const apr_array_header_t *keys, 548 apr_pool_t *pool) 549{ 550 int i; 551 apr_hash_t *hash = svn_hash__make(pool); 552 for (i = 0; i < keys->nelts; i++) 553 { 554 const char *key = 555 apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *)); 556 svn_hash_sets(hash, key, key); 557 } 558 *hash_p = hash; 559 return SVN_NO_ERROR; 560} 561 562 563 564/*** Specialized getter APIs ***/ 565 566const char * 567svn_hash__get_cstring(apr_hash_t *hash, 568 const char *key, 569 const char *default_value) 570{ 571 if (hash) 572 { 573 const char *value = svn_hash_gets(hash, key); 574 return value ? value : default_value; 575 } 576 577 return default_value; 578} 579 580 581svn_boolean_t 582svn_hash__get_bool(apr_hash_t *hash, const char *key, 583 svn_boolean_t default_value) 584{ 585 const char *tmp_value = svn_hash__get_cstring(hash, key, NULL); 586 svn_tristate_t value = svn_tristate__from_word(tmp_value); 587 588 if (value == svn_tristate_true) 589 return TRUE; 590 else if (value == svn_tristate_false) 591 return FALSE; 592 593 return default_value; 594} 595 596 597 598/*** Optimized hash function ***/ 599 600/* apr_hashfunc_t optimized for the key that we use in SVN: paths and 601 * property names. Its primary goal is speed for keys of known length. 602 * 603 * Since strings tend to spawn large value spaces (usually differ in many 604 * bits with differences spanning a larger section of the key), we can be 605 * quite sloppy extracting a hash value. The more keys there are in a 606 * hash container, the more bits of the value returned by this function 607 * will be used. For a small number of string keys, choosing bits from any 608 * any fix location close to the tail of those keys would usually be good 609 * enough to prevent high collision rates. 610 */ 611static unsigned int 612hashfunc_compatible(const char *char_key, apr_ssize_t *klen) 613{ 614 unsigned int hash = 0; 615 const unsigned char *key = (const unsigned char *)char_key; 616 const unsigned char *p; 617 apr_ssize_t i; 618 619 if (*klen == APR_HASH_KEY_STRING) 620 *klen = strlen(char_key); 621 622#if SVN_UNALIGNED_ACCESS_IS_OK 623 for (p = key, i = *klen; i >= 4; i-=4, p+=4) 624 { 625 apr_uint32_t chunk = *(const apr_uint32_t *)p; 626 627 /* the ">> 17" part gives upper bits in the chunk a chance to make 628 some impact as well */ 629 hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17); 630 } 631#else 632 for (p = key, i = *klen; i >= 4; i-=4, p+=4) 633 { 634 hash = hash * 33 * 33 * 33 * 33 635 + p[0] * 33 * 33 * 33 636 + p[1] * 33 * 33 637 + p[2] * 33 638 + p[3]; 639 } 640#endif 641 for (; i; i--, p++) 642 hash = hash * 33 + *p; 643 644 return hash; 645} 646 647apr_hash_t * 648svn_hash__make(apr_pool_t *pool) 649{ 650 return apr_hash_make_custom(pool, hashfunc_compatible); 651} 652