hash.c revision 251881
1/* 2 * hash.c : dumping and reading hash tables to/from files. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <stdlib.h> 27#include <limits.h> 28 29#include <apr_version.h> 30#include <apr_pools.h> 31#include <apr_hash.h> 32#include <apr_file_io.h> 33 34#include "svn_types.h" 35#include "svn_string.h" 36#include "svn_error.h" 37#include "svn_hash.h" 38#include "svn_sorts.h" 39#include "svn_io.h" 40#include "svn_pools.h" 41 42#include "private/svn_dep_compat.h" 43#include "private/svn_subr_private.h" 44 45#include "svn_private_config.h" 46 47 48 49 50/* 51 * The format of a dumped hash table is: 52 * 53 * K <nlength> 54 * name (a string of <nlength> bytes, followed by a newline) 55 * V <vlength> 56 * val (a string of <vlength> bytes, followed by a newline) 57 * [... etc, etc ...] 58 * END 59 * 60 * 61 * (Yes, there is a newline after END.) 62 * 63 * For example: 64 * 65 * K 5 66 * color 67 * V 3 68 * red 69 * K 11 70 * wine review 71 * V 376 72 * A forthright entrance, yet coquettish on the tongue, its deceptively 73 * fruity exterior hides the warm mahagony undercurrent that is the 74 * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will 75 * be pleased to note the familiar, subtle hints of mulberries and 76 * carburator fluid. Its confident finish is marred only by a barely 77 * detectable suggestion of rancid squid ink. 78 * K 5 79 * price 80 * V 8 81 * US $6.50 82 * END 83 * 84 */ 85 86 87 88 89/*** Dumping and loading hash files. */ 90 91/* Implements svn_hash_read2 and svn_hash_read_incremental. */ 92static svn_error_t * 93hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator, 94 svn_boolean_t incremental, apr_pool_t *pool) 95{ 96 svn_stringbuf_t *buf; 97 svn_boolean_t eof; 98 apr_size_t len, keylen, vallen; 99 char c, *keybuf, *valbuf; 100 apr_pool_t *iterpool = svn_pool_create(pool); 101 102 while (1) 103 { 104 svn_error_t *err; 105 apr_uint64_t ui64; 106 107 svn_pool_clear(iterpool); 108 109 /* Read a key length line. Might be END, though. */ 110 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool)); 111 112 /* Check for the end of the hash. */ 113 if ((!terminator && eof && buf->len == 0) 114 || (terminator && (strcmp(buf->data, terminator) == 0))) 115 break; 116 117 /* Check for unexpected end of stream */ 118 if (eof) 119 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 120 _("Serialized hash missing terminator")); 121 122 if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' ')) 123 { 124 /* Get the length of the key */ 125 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 126 0, APR_SIZE_MAX, 10); 127 if (err) 128 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 129 _("Serialized hash malformed")); 130 keylen = (apr_size_t)ui64; 131 132 /* Now read that much into a buffer. */ 133 keybuf = apr_palloc(pool, keylen + 1); 134 SVN_ERR(svn_stream_read(stream, keybuf, &keylen)); 135 keybuf[keylen] = '\0'; 136 137 /* Suck up extra newline after key data */ 138 len = 1; 139 SVN_ERR(svn_stream_read(stream, &c, &len)); 140 if (c != '\n') 141 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 142 _("Serialized hash malformed")); 143 144 /* Read a val length line */ 145 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool)); 146 147 if ((buf->data[0] == 'V') && (buf->data[1] == ' ')) 148 { 149 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 150 0, APR_SIZE_MAX, 10); 151 if (err) 152 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 153 _("Serialized hash malformed")); 154 vallen = (apr_size_t)ui64; 155 156 valbuf = apr_palloc(iterpool, vallen + 1); 157 SVN_ERR(svn_stream_read(stream, valbuf, &vallen)); 158 valbuf[vallen] = '\0'; 159 160 /* Suck up extra newline after val data */ 161 len = 1; 162 SVN_ERR(svn_stream_read(stream, &c, &len)); 163 if (c != '\n') 164 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 165 _("Serialized hash malformed")); 166 167 /* Add a new hash entry. */ 168 apr_hash_set(hash, keybuf, keylen, 169 svn_string_ncreate(valbuf, vallen, pool)); 170 } 171 else 172 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 173 _("Serialized hash malformed")); 174 } 175 else if (incremental && (buf->len >= 3) 176 && (buf->data[0] == 'D') && (buf->data[1] == ' ')) 177 { 178 /* Get the length of the key */ 179 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 180 0, APR_SIZE_MAX, 10); 181 if (err) 182 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 183 _("Serialized hash malformed")); 184 keylen = (apr_size_t)ui64; 185 186 /* Now read that much into a buffer. */ 187 keybuf = apr_palloc(iterpool, keylen + 1); 188 SVN_ERR(svn_stream_read(stream, keybuf, &keylen)); 189 keybuf[keylen] = '\0'; 190 191 /* Suck up extra newline after key data */ 192 len = 1; 193 SVN_ERR(svn_stream_read(stream, &c, &len)); 194 if (c != '\n') 195 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 196 _("Serialized hash malformed")); 197 198 /* Remove this hash entry. */ 199 apr_hash_set(hash, keybuf, keylen, NULL); 200 } 201 else 202 { 203 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 204 _("Serialized hash malformed")); 205 } 206 } 207 208 svn_pool_destroy(iterpool); 209 return SVN_NO_ERROR; 210} 211 212 213/* Implements svn_hash_write2 and svn_hash_write_incremental. */ 214static svn_error_t * 215hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream, 216 const char *terminator, apr_pool_t *pool) 217{ 218 apr_pool_t *subpool; 219 apr_size_t len; 220 apr_array_header_t *list; 221 int i; 222 223 subpool = svn_pool_create(pool); 224 225 list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool); 226 for (i = 0; i < list->nelts; i++) 227 { 228 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t); 229 svn_string_t *valstr = item->value; 230 231 svn_pool_clear(subpool); 232 233 /* Don't output entries equal to the ones in oldhash, if present. */ 234 if (oldhash) 235 { 236 svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen); 237 238 if (oldstr && svn_string_compare(valstr, oldstr)) 239 continue; 240 } 241 242 if (item->klen < 0) 243 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 244 _("Cannot serialize negative length")); 245 246 /* Write it out. */ 247 SVN_ERR(svn_stream_printf(stream, subpool, 248 "K %" APR_SIZE_T_FMT "\n%s\n" 249 "V %" APR_SIZE_T_FMT "\n", 250 (apr_size_t) item->klen, 251 (const char *) item->key, 252 valstr->len)); 253 len = valstr->len; 254 SVN_ERR(svn_stream_write(stream, valstr->data, &len)); 255 SVN_ERR(svn_stream_puts(stream, "\n")); 256 } 257 258 if (oldhash) 259 { 260 /* Output a deletion entry for each property in oldhash but not hash. */ 261 list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically, 262 pool); 263 for (i = 0; i < list->nelts; i++) 264 { 265 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t); 266 267 svn_pool_clear(subpool); 268 269 /* If it's not present in the new hash, write out a D entry. */ 270 if (! apr_hash_get(hash, item->key, item->klen)) 271 SVN_ERR(svn_stream_printf(stream, subpool, 272 "D %" APR_SSIZE_T_FMT "\n%s\n", 273 item->klen, (const char *) item->key)); 274 } 275 } 276 277 if (terminator) 278 SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator)); 279 280 svn_pool_destroy(subpool); 281 return SVN_NO_ERROR; 282} 283 284 285svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream, 286 const char *terminator, apr_pool_t *pool) 287{ 288 return hash_read(hash, stream, terminator, FALSE, pool); 289} 290 291 292svn_error_t *svn_hash_read_incremental(apr_hash_t *hash, 293 svn_stream_t *stream, 294 const char *terminator, 295 apr_pool_t *pool) 296{ 297 return hash_read(hash, stream, terminator, TRUE, pool); 298} 299 300 301svn_error_t * 302svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream, 303 const char *terminator, apr_pool_t *pool) 304{ 305 return hash_write(hash, NULL, stream, terminator, pool); 306} 307 308 309svn_error_t * 310svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash, 311 svn_stream_t *stream, const char *terminator, 312 apr_pool_t *pool) 313{ 314 SVN_ERR_ASSERT(oldhash != NULL); 315 return hash_write(hash, oldhash, stream, terminator, pool); 316} 317 318 319svn_error_t * 320svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool) 321{ 322 return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool), 323 SVN_HASH_TERMINATOR, pool); 324} 325 326 327/* There are enough quirks in the deprecated svn_hash_read that we 328 should just preserve its implementation. */ 329svn_error_t * 330svn_hash_read(apr_hash_t *hash, 331 apr_file_t *srcfile, 332 apr_pool_t *pool) 333{ 334 svn_error_t *err; 335 char buf[SVN_KEYLINE_MAXLEN]; 336 apr_size_t num_read; 337 char c; 338 int first_time = 1; 339 340 341 while (1) 342 { 343 /* Read a key length line. Might be END, though. */ 344 apr_size_t len = sizeof(buf); 345 346 err = svn_io_read_length_line(srcfile, buf, &len, pool); 347 if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time) 348 { 349 /* We got an EOF on our very first attempt to read, which 350 means it's a zero-byte file. No problem, just go home. */ 351 svn_error_clear(err); 352 return SVN_NO_ERROR; 353 } 354 else if (err) 355 /* Any other circumstance is a genuine error. */ 356 return err; 357 358 first_time = 0; 359 360 if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D')) 361 || ((len == 9) 362 && (buf[0] == 'P') 363 && (buf[1] == 'R') /* We formerly used just "END" to */ 364 && (buf[2] == 'O') /* end a property hash, but later */ 365 && (buf[3] == 'P') /* we added "PROPS-END", so that */ 366 && (buf[4] == 'S') /* the fs dump format would be */ 367 && (buf[5] == '-') /* more human-readable. That's */ 368 && (buf[6] == 'E') /* why we accept either way here. */ 369 && (buf[7] == 'N') 370 && (buf[8] == 'D'))) 371 { 372 /* We've reached the end of the dumped hash table, so leave. */ 373 return SVN_NO_ERROR; 374 } 375 else if ((buf[0] == 'K') && (buf[1] == ' ')) 376 { 377 size_t keylen; 378 int parsed_len; 379 void *keybuf; 380 381 /* Get the length of the key */ 382 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2)); 383 keylen = parsed_len; 384 385 /* Now read that much into a buffer, + 1 byte for null terminator */ 386 keybuf = apr_palloc(pool, keylen + 1); 387 SVN_ERR(svn_io_file_read_full2(srcfile, 388 keybuf, keylen, 389 &num_read, NULL, pool)); 390 ((char *) keybuf)[keylen] = '\0'; 391 392 /* Suck up extra newline after key data */ 393 SVN_ERR(svn_io_file_getc(&c, srcfile, pool)); 394 if (c != '\n') 395 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 396 397 /* Read a val length line */ 398 len = sizeof(buf); 399 SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool)); 400 401 if ((buf[0] == 'V') && (buf[1] == ' ')) 402 { 403 svn_string_t *value = apr_palloc(pool, sizeof(*value)); 404 apr_size_t vallen; 405 void *valbuf; 406 407 /* Get the length of the value */ 408 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2)); 409 vallen = parsed_len; 410 411 /* Again, 1 extra byte for the null termination. */ 412 valbuf = apr_palloc(pool, vallen + 1); 413 SVN_ERR(svn_io_file_read_full2(srcfile, 414 valbuf, vallen, 415 &num_read, NULL, pool)); 416 ((char *) valbuf)[vallen] = '\0'; 417 418 /* Suck up extra newline after val data */ 419 SVN_ERR(svn_io_file_getc(&c, srcfile, pool)); 420 if (c != '\n') 421 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 422 423 value->data = valbuf; 424 value->len = vallen; 425 426 /* The Grand Moment: add a new hash entry! */ 427 apr_hash_set(hash, keybuf, keylen, value); 428 } 429 else 430 { 431 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 432 } 433 } 434 else 435 { 436 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 437 } 438 } /* while (1) */ 439} 440 441 442 443/*** Diffing hashes ***/ 444 445svn_error_t * 446svn_hash_diff(apr_hash_t *hash_a, 447 apr_hash_t *hash_b, 448 svn_hash_diff_func_t diff_func, 449 void *diff_func_baton, 450 apr_pool_t *pool) 451{ 452 apr_hash_index_t *hi; 453 454 if (hash_a) 455 for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi)) 456 { 457 const void *key; 458 apr_ssize_t klen; 459 460 apr_hash_this(hi, &key, &klen, NULL); 461 462 if (hash_b && (apr_hash_get(hash_b, key, klen))) 463 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both, 464 diff_func_baton)); 465 else 466 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a, 467 diff_func_baton)); 468 } 469 470 if (hash_b) 471 for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi)) 472 { 473 const void *key; 474 apr_ssize_t klen; 475 476 apr_hash_this(hi, &key, &klen, NULL); 477 478 if (! (hash_a && apr_hash_get(hash_a, key, klen))) 479 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b, 480 diff_func_baton)); 481 } 482 483 return SVN_NO_ERROR; 484} 485 486 487/*** Misc. hash APIs ***/ 488 489svn_error_t * 490svn_hash_keys(apr_array_header_t **array, 491 apr_hash_t *hash, 492 apr_pool_t *pool) 493{ 494 apr_hash_index_t *hi; 495 496 *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *)); 497 498 for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi)) 499 { 500 APR_ARRAY_PUSH(*array, const char *) = svn__apr_hash_index_key(hi); 501 } 502 503 return SVN_NO_ERROR; 504} 505 506 507svn_error_t * 508svn_hash_from_cstring_keys(apr_hash_t **hash_p, 509 const apr_array_header_t *keys, 510 apr_pool_t *pool) 511{ 512 int i; 513 apr_hash_t *hash = svn_hash__make(pool); 514 for (i = 0; i < keys->nelts; i++) 515 { 516 const char *key = 517 apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *)); 518 svn_hash_sets(hash, key, key); 519 } 520 *hash_p = hash; 521 return SVN_NO_ERROR; 522} 523 524 525#if !APR_VERSION_AT_LEAST(1, 3, 0) 526void 527svn_hash__clear(apr_hash_t *hash) 528{ 529 apr_hash_index_t *hi; 530 const void *key; 531 apr_ssize_t klen; 532 533 for (hi = apr_hash_first(NULL, hash); hi; hi = apr_hash_next(hi)) 534 { 535 apr_hash_this(hi, &key, &klen, NULL); 536 apr_hash_set(hash, key, klen, NULL); 537 } 538} 539#endif 540 541 542 543/*** Specialized getter APIs ***/ 544 545const char * 546svn_hash__get_cstring(apr_hash_t *hash, 547 const char *key, 548 const char *default_value) 549{ 550 if (hash) 551 { 552 const char *value = svn_hash_gets(hash, key); 553 return value ? value : default_value; 554 } 555 556 return default_value; 557} 558 559 560svn_boolean_t 561svn_hash__get_bool(apr_hash_t *hash, const char *key, 562 svn_boolean_t default_value) 563{ 564 const char *tmp_value = svn_hash__get_cstring(hash, key, NULL); 565 svn_tristate_t value = svn_tristate__from_word(tmp_value); 566 567 if (value == svn_tristate_true) 568 return TRUE; 569 else if (value == svn_tristate_false) 570 return FALSE; 571 572 return default_value; 573} 574 575 576 577/*** Optimized hash function ***/ 578 579/* Optimized version of apr_hashfunc_default in APR 1.4.5 and earlier. 580 * It assumes that the CPU has 32-bit multiplications with high throughput 581 * of at least 1 operation every 3 cycles. Latency is not an issue. Another 582 * optimization is a mildly unrolled main loop and breaking the dependency 583 * chain within the loop. 584 * 585 * Note that most CPUs including Intel Atom, VIA Nano, ARM feature the 586 * assumed pipelined multiplication circuitry. They can do one MUL every 587 * or every other cycle. 588 * 589 * The performance is ultimately limited by the fact that most CPUs can 590 * do only one LOAD and only one BRANCH operation per cycle. The best we 591 * can do is to process one character per cycle - provided the processor 592 * is wide enough to do 1 LOAD, COMPARE, BRANCH, MUL and ADD per cycle. 593 */ 594static unsigned int 595hashfunc_compatible(const char *char_key, apr_ssize_t *klen) 596{ 597 unsigned int hash = 0; 598 const unsigned char *key = (const unsigned char *)char_key; 599 const unsigned char *p; 600 apr_ssize_t i; 601 602 if (*klen == APR_HASH_KEY_STRING) 603 { 604 for (p = key; ; p+=4) 605 { 606 unsigned int new_hash = hash * 33 * 33 * 33 * 33; 607 if (!p[0]) break; 608 new_hash += p[0] * 33 * 33 * 33; 609 if (!p[1]) break; 610 new_hash += p[1] * 33 * 33; 611 if (!p[2]) break; 612 new_hash += p[2] * 33; 613 if (!p[3]) break; 614 hash = new_hash + p[3]; 615 } 616 for (; *p; p++) 617 hash = hash * 33 + *p; 618 619 *klen = p - key; 620 } 621 else 622 { 623 for (p = key, i = *klen; i >= 4; i-=4, p+=4) 624 { 625 hash = hash * 33 * 33 * 33 * 33 626 + p[0] * 33 * 33 * 33 627 + p[1] * 33 * 33 628 + p[2] * 33 629 + p[3]; 630 } 631 for (; i; i--, p++) 632 hash = hash * 33 + *p; 633 } 634 635 return hash; 636} 637 638apr_hash_t * 639svn_hash__make(apr_pool_t *pool) 640{ 641 return apr_hash_make_custom(pool, hashfunc_compatible); 642} 643