hash.c revision 289180
1/*
2 * hash.c :  dumping and reading hash tables to/from files.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <stdlib.h>
27#include <limits.h>
28
29#include <apr_version.h>
30#include <apr_pools.h>
31#include <apr_hash.h>
32#include <apr_file_io.h>
33
34#include "svn_types.h"
35#include "svn_string.h"
36#include "svn_error.h"
37#include "svn_hash.h"
38#include "svn_sorts.h"
39#include "svn_io.h"
40#include "svn_pools.h"
41
42#include "private/svn_dep_compat.h"
43#include "private/svn_sorts_private.h"
44#include "private/svn_subr_private.h"
45
46#include "svn_private_config.h"
47
48
49
50
51/*
52 * The format of a dumped hash table is:
53 *
54 *   K <nlength>
55 *   name (a string of <nlength> bytes, followed by a newline)
56 *   V <vlength>
57 *   val (a string of <vlength> bytes, followed by a newline)
58 *   [... etc, etc ...]
59 *   END
60 *
61 *
62 * (Yes, there is a newline after END.)
63 *
64 * For example:
65 *
66 *   K 5
67 *   color
68 *   V 3
69 *   red
70 *   K 11
71 *   wine review
72 *   V 376
73 *   A forthright entrance, yet coquettish on the tongue, its deceptively
74 *   fruity exterior hides the warm mahagony undercurrent that is the
75 *   hallmark of Chateau Fraisant-Pitre.  Connoisseurs of the region will
76 *   be pleased to note the familiar, subtle hints of mulberries and
77 *   carburator fluid.  Its confident finish is marred only by a barely
78 *   detectable suggestion of rancid squid ink.
79 *   K 5
80 *   price
81 *   V 8
82 *   US $6.50
83 *   END
84 *
85 */
86
87
88
89
90/*** Dumping and loading hash files. */
91
92/* Implements svn_hash_read2 and svn_hash_read_incremental. */
93svn_error_t *
94svn_hash__read_entry(svn_hash__entry_t *entry,
95                     svn_stream_t *stream,
96                     const char *terminator,
97                     svn_boolean_t incremental,
98                     apr_pool_t *pool)
99{
100  svn_stringbuf_t *buf;
101  svn_boolean_t eof;
102  apr_size_t len;
103  char c;
104
105  svn_error_t *err;
106  apr_uint64_t ui64;
107
108  /* Read a key length line.  Might be END, though. */
109  SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
110
111  /* Check for the end of the hash. */
112  if ((!terminator && eof && buf->len == 0)
113      || (terminator && (strcmp(buf->data, terminator) == 0)))
114  {
115    entry->key = NULL;
116    entry->keylen = 0;
117    entry->val = NULL;
118    entry->vallen = 0;
119
120    return SVN_NO_ERROR;
121  }
122
123  /* Check for unexpected end of stream */
124  if (eof)
125    return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
126                            _("Serialized hash missing terminator"));
127
128  if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
129    {
130      /* Get the length of the key */
131      err = svn_cstring_strtoui64(&ui64, buf->data + 2,
132                                  0, APR_SIZE_MAX, 10);
133      if (err)
134        return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
135                                _("Serialized hash malformed key length"));
136      entry->keylen = (apr_size_t)ui64;
137
138      /* Now read that much into a buffer. */
139      entry->key = apr_palloc(pool, entry->keylen + 1);
140      SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
141      entry->key[entry->keylen] = '\0';
142
143      /* Suck up extra newline after key data */
144      len = 1;
145      SVN_ERR(svn_stream_read_full(stream, &c, &len));
146      if (c != '\n')
147        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
148                                _("Serialized hash malformed key data"));
149
150      /* Read a val length line */
151      SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
152
153      if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
154        {
155          /* Get the length of the val */
156          err = svn_cstring_strtoui64(&ui64, buf->data + 2,
157                                      0, APR_SIZE_MAX, 10);
158          if (err)
159            return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
160                                    _("Serialized hash malformed value length"));
161          entry->vallen = (apr_size_t)ui64;
162
163          entry->val = apr_palloc(pool, entry->vallen + 1);
164          SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen));
165          entry->val[entry->vallen] = '\0';
166
167          /* Suck up extra newline after val data */
168          len = 1;
169          SVN_ERR(svn_stream_read_full(stream, &c, &len));
170          if (c != '\n')
171            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
172                                    _("Serialized hash malformed value data"));
173        }
174      else
175        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
176                                _("Serialized hash malformed"));
177    }
178  else if (incremental && (buf->len >= 3)
179           && (buf->data[0] == 'D') && (buf->data[1] == ' '))
180    {
181      /* Get the length of the key */
182      err = svn_cstring_strtoui64(&ui64, buf->data + 2,
183                                  0, APR_SIZE_MAX, 10);
184      if (err)
185        return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
186                                _("Serialized hash malformed key length"));
187      entry->keylen = (apr_size_t)ui64;
188
189      /* Now read that much into a buffer. */
190      entry->key = apr_palloc(pool, entry->keylen + 1);
191      SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
192      entry->key[entry->keylen] = '\0';
193
194      /* Suck up extra newline after key data */
195      len = 1;
196      SVN_ERR(svn_stream_read_full(stream, &c, &len));
197      if (c != '\n')
198        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
199                                _("Serialized hash malformed key data"));
200
201      /* Remove this hash entry. */
202      entry->vallen = 0;
203      entry->val = NULL;
204    }
205  else
206    {
207      return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
208                              _("Serialized hash malformed"));
209    }
210
211  return SVN_NO_ERROR;
212}
213
214static svn_error_t *
215hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
216          svn_boolean_t incremental, apr_pool_t *pool)
217{
218  apr_pool_t *iterpool = svn_pool_create(pool);
219
220  while (1)
221    {
222      svn_hash__entry_t entry;
223
224      svn_pool_clear(iterpool);
225      SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
226                                   incremental, iterpool));
227
228      /* end of hash? */
229      if (entry.key == NULL)
230        break;
231
232      if (entry.val)
233        {
234          /* Add a new hash entry. */
235          apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen),
236                       entry.keylen,
237                       svn_string_ncreate(entry.val, entry.vallen, pool));
238        }
239      else
240        {
241          /* Remove this hash entry. */
242          apr_hash_set(hash, entry.key, entry.keylen, NULL);
243        }
244    }
245
246  svn_pool_destroy(iterpool);
247  return SVN_NO_ERROR;
248}
249
250
251/* Implements svn_hash_write2 and svn_hash_write_incremental. */
252static svn_error_t *
253hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
254           const char *terminator, apr_pool_t *pool)
255{
256  apr_pool_t *subpool;
257  apr_size_t len;
258  apr_array_header_t *list;
259  int i;
260
261  subpool = svn_pool_create(pool);
262
263  list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
264  for (i = 0; i < list->nelts; i++)
265    {
266      svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
267      svn_string_t *valstr = item->value;
268
269      svn_pool_clear(subpool);
270
271      /* Don't output entries equal to the ones in oldhash, if present. */
272      if (oldhash)
273        {
274          svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
275
276          if (oldstr && svn_string_compare(valstr, oldstr))
277            continue;
278        }
279
280      if (item->klen < 0)
281        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
282                                _("Cannot serialize negative length"));
283
284      /* Write it out. */
285      SVN_ERR(svn_stream_printf(stream, subpool,
286                                "K %" APR_SIZE_T_FMT "\n%s\n"
287                                "V %" APR_SIZE_T_FMT "\n",
288                                (apr_size_t) item->klen,
289                                (const char *) item->key,
290                                valstr->len));
291      len = valstr->len;
292      SVN_ERR(svn_stream_write(stream, valstr->data, &len));
293      SVN_ERR(svn_stream_puts(stream, "\n"));
294    }
295
296  if (oldhash)
297    {
298      /* Output a deletion entry for each property in oldhash but not hash. */
299      list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
300                            pool);
301      for (i = 0; i < list->nelts; i++)
302        {
303          svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
304
305          svn_pool_clear(subpool);
306
307          /* If it's not present in the new hash, write out a D entry. */
308          if (! apr_hash_get(hash, item->key, item->klen))
309            SVN_ERR(svn_stream_printf(stream, subpool,
310                                      "D %" APR_SSIZE_T_FMT "\n%s\n",
311                                      item->klen, (const char *) item->key));
312        }
313    }
314
315  if (terminator)
316    SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
317
318  svn_pool_destroy(subpool);
319  return SVN_NO_ERROR;
320}
321
322
323svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
324                            const char *terminator, apr_pool_t *pool)
325{
326  return hash_read(hash, stream, terminator, FALSE, pool);
327}
328
329
330svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
331                                       svn_stream_t *stream,
332                                       const char *terminator,
333                                       apr_pool_t *pool)
334{
335  return hash_read(hash, stream, terminator, TRUE, pool);
336}
337
338
339svn_error_t *
340svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
341                const char *terminator, apr_pool_t *pool)
342{
343  return hash_write(hash, NULL, stream, terminator, pool);
344}
345
346
347svn_error_t *
348svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
349                           svn_stream_t *stream, const char *terminator,
350                           apr_pool_t *pool)
351{
352  SVN_ERR_ASSERT(oldhash != NULL);
353  return hash_write(hash, oldhash, stream, terminator, pool);
354}
355
356
357svn_error_t *
358svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
359{
360  return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
361                    SVN_HASH_TERMINATOR, pool);
362}
363
364
365/* There are enough quirks in the deprecated svn_hash_read that we
366   should just preserve its implementation. */
367svn_error_t *
368svn_hash_read(apr_hash_t *hash,
369              apr_file_t *srcfile,
370              apr_pool_t *pool)
371{
372  svn_error_t *err;
373  char buf[SVN_KEYLINE_MAXLEN];
374  apr_size_t num_read;
375  char c;
376  int first_time = 1;
377
378
379  while (1)
380    {
381      /* Read a key length line.  Might be END, though. */
382      apr_size_t len = sizeof(buf);
383
384      err = svn_io_read_length_line(srcfile, buf, &len, pool);
385      if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
386        {
387          /* We got an EOF on our very first attempt to read, which
388             means it's a zero-byte file.  No problem, just go home. */
389          svn_error_clear(err);
390          return SVN_NO_ERROR;
391        }
392      else if (err)
393        /* Any other circumstance is a genuine error. */
394        return err;
395
396      first_time = 0;
397
398      if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
399          || ((len == 9)
400              && (buf[0] == 'P')
401              && (buf[1] == 'R')       /* We formerly used just "END" to */
402              && (buf[2] == 'O')       /* end a property hash, but later */
403              && (buf[3] == 'P')       /* we added "PROPS-END", so that  */
404              && (buf[4] == 'S')       /* the fs dump format would be    */
405              && (buf[5] == '-')       /* more human-readable.  That's   */
406              && (buf[6] == 'E')       /* why we accept either way here. */
407              && (buf[7] == 'N')
408              && (buf[8] == 'D')))
409        {
410          /* We've reached the end of the dumped hash table, so leave. */
411          return SVN_NO_ERROR;
412        }
413      else if ((buf[0] == 'K') && (buf[1] == ' '))
414        {
415          size_t keylen;
416          int parsed_len;
417          void *keybuf;
418
419          /* Get the length of the key */
420          SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
421          keylen = parsed_len;
422
423          /* Now read that much into a buffer, + 1 byte for null terminator */
424          keybuf = apr_palloc(pool, keylen + 1);
425          SVN_ERR(svn_io_file_read_full2(srcfile,
426                                         keybuf, keylen,
427                                         &num_read, NULL, pool));
428          ((char *) keybuf)[keylen] = '\0';
429
430          /* Suck up extra newline after key data */
431          SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
432          if (c != '\n')
433            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
434
435          /* Read a val length line */
436          len = sizeof(buf);
437          SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
438
439          if ((buf[0] == 'V') && (buf[1] == ' '))
440            {
441              svn_string_t *value = apr_palloc(pool, sizeof(*value));
442              apr_size_t vallen;
443              void *valbuf;
444
445              /* Get the length of the value */
446              SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
447              vallen = parsed_len;
448
449              /* Again, 1 extra byte for the null termination. */
450              valbuf = apr_palloc(pool, vallen + 1);
451              SVN_ERR(svn_io_file_read_full2(srcfile,
452                                             valbuf, vallen,
453                                             &num_read, NULL, pool));
454              ((char *) valbuf)[vallen] = '\0';
455
456              /* Suck up extra newline after val data */
457              SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
458              if (c != '\n')
459                return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
460
461              value->data = valbuf;
462              value->len = vallen;
463
464              /* The Grand Moment:  add a new hash entry! */
465              apr_hash_set(hash, keybuf, keylen, value);
466            }
467          else
468            {
469              return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
470            }
471        }
472      else
473        {
474          return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
475        }
476    } /* while (1) */
477}
478
479
480
481/*** Diffing hashes ***/
482
483svn_error_t *
484svn_hash_diff(apr_hash_t *hash_a,
485              apr_hash_t *hash_b,
486              svn_hash_diff_func_t diff_func,
487              void *diff_func_baton,
488              apr_pool_t *pool)
489{
490  apr_hash_index_t *hi;
491
492  if (hash_a)
493    for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
494      {
495        const void *key;
496        apr_ssize_t klen;
497
498        apr_hash_this(hi, &key, &klen, NULL);
499
500        if (hash_b && (apr_hash_get(hash_b, key, klen)))
501          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
502                               diff_func_baton));
503        else
504          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
505                               diff_func_baton));
506      }
507
508  if (hash_b)
509    for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
510      {
511        const void *key;
512        apr_ssize_t klen;
513
514        apr_hash_this(hi, &key, &klen, NULL);
515
516        if (! (hash_a && apr_hash_get(hash_a, key, klen)))
517          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
518                               diff_func_baton));
519      }
520
521  return SVN_NO_ERROR;
522}
523
524
525/*** Misc. hash APIs ***/
526
527svn_error_t *
528svn_hash_keys(apr_array_header_t **array,
529              apr_hash_t *hash,
530              apr_pool_t *pool)
531{
532  apr_hash_index_t *hi;
533
534  *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
535
536  for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
537    {
538      APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi);
539    }
540
541  return SVN_NO_ERROR;
542}
543
544
545svn_error_t *
546svn_hash_from_cstring_keys(apr_hash_t **hash_p,
547                           const apr_array_header_t *keys,
548                           apr_pool_t *pool)
549{
550  int i;
551  apr_hash_t *hash = svn_hash__make(pool);
552  for (i = 0; i < keys->nelts; i++)
553    {
554      const char *key =
555        apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
556      svn_hash_sets(hash, key, key);
557    }
558  *hash_p = hash;
559  return SVN_NO_ERROR;
560}
561
562
563
564/*** Specialized getter APIs ***/
565
566const char *
567svn_hash__get_cstring(apr_hash_t *hash,
568                      const char *key,
569                      const char *default_value)
570{
571  if (hash)
572    {
573      const char *value = svn_hash_gets(hash, key);
574      return value ? value : default_value;
575    }
576
577  return default_value;
578}
579
580
581svn_boolean_t
582svn_hash__get_bool(apr_hash_t *hash, const char *key,
583                   svn_boolean_t default_value)
584{
585  const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
586  svn_tristate_t value = svn_tristate__from_word(tmp_value);
587
588  if (value == svn_tristate_true)
589    return TRUE;
590  else if (value == svn_tristate_false)
591    return FALSE;
592
593  return default_value;
594}
595
596
597
598/*** Optimized hash function ***/
599
600/* apr_hashfunc_t optimized for the key that we use in SVN: paths and
601 * property names.  Its primary goal is speed for keys of known length.
602 *
603 * Since strings tend to spawn large value spaces (usually differ in many
604 * bits with differences spanning a larger section of the key), we can be
605 * quite sloppy extracting a hash value.  The more keys there are in a
606 * hash container, the more bits of the value returned by this function
607 * will be used.  For a small number of string keys, choosing bits from any
608 * any fix location close to the tail of those keys would usually be good
609 * enough to prevent high collision rates.
610 */
611static unsigned int
612hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
613{
614    unsigned int hash = 0;
615    const unsigned char *key = (const unsigned char *)char_key;
616    const unsigned char *p;
617    apr_ssize_t i;
618
619    if (*klen == APR_HASH_KEY_STRING)
620      *klen = strlen(char_key);
621
622#if SVN_UNALIGNED_ACCESS_IS_OK
623    for (p = key, i = *klen; i >= 4; i-=4, p+=4)
624      {
625        apr_uint32_t chunk = *(const apr_uint32_t *)p;
626
627        /* the ">> 17" part gives upper bits in the chunk a chance to make
628           some impact as well */
629        hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
630      }
631#else
632    for (p = key, i = *klen; i >= 4; i-=4, p+=4)
633      {
634        hash = hash * 33 * 33 * 33 * 33
635              + p[0] * 33 * 33 * 33
636              + p[1] * 33 * 33
637              + p[2] * 33
638              + p[3];
639      }
640#endif
641    for (; i; i--, p++)
642        hash = hash * 33 + *p;
643
644    return hash;
645}
646
647apr_hash_t *
648svn_hash__make(apr_pool_t *pool)
649{
650  return apr_hash_make_custom(pool, hashfunc_compatible);
651}
652