hash.c revision 251881
1/*
2 * hash.c :  dumping and reading hash tables to/from files.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <stdlib.h>
27#include <limits.h>
28
29#include <apr_version.h>
30#include <apr_pools.h>
31#include <apr_hash.h>
32#include <apr_file_io.h>
33
34#include "svn_types.h"
35#include "svn_string.h"
36#include "svn_error.h"
37#include "svn_hash.h"
38#include "svn_sorts.h"
39#include "svn_io.h"
40#include "svn_pools.h"
41
42#include "private/svn_dep_compat.h"
43#include "private/svn_subr_private.h"
44
45#include "svn_private_config.h"
46
47
48
49
50/*
51 * The format of a dumped hash table is:
52 *
53 *   K <nlength>
54 *   name (a string of <nlength> bytes, followed by a newline)
55 *   V <vlength>
56 *   val (a string of <vlength> bytes, followed by a newline)
57 *   [... etc, etc ...]
58 *   END
59 *
60 *
61 * (Yes, there is a newline after END.)
62 *
63 * For example:
64 *
65 *   K 5
66 *   color
67 *   V 3
68 *   red
69 *   K 11
70 *   wine review
71 *   V 376
72 *   A forthright entrance, yet coquettish on the tongue, its deceptively
73 *   fruity exterior hides the warm mahagony undercurrent that is the
74 *   hallmark of Chateau Fraisant-Pitre.  Connoisseurs of the region will
75 *   be pleased to note the familiar, subtle hints of mulberries and
76 *   carburator fluid.  Its confident finish is marred only by a barely
77 *   detectable suggestion of rancid squid ink.
78 *   K 5
79 *   price
80 *   V 8
81 *   US $6.50
82 *   END
83 *
84 */
85
86
87
88
89/*** Dumping and loading hash files. */
90
91/* Implements svn_hash_read2 and svn_hash_read_incremental. */
92static svn_error_t *
93hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
94          svn_boolean_t incremental, apr_pool_t *pool)
95{
96  svn_stringbuf_t *buf;
97  svn_boolean_t eof;
98  apr_size_t len, keylen, vallen;
99  char c, *keybuf, *valbuf;
100  apr_pool_t *iterpool = svn_pool_create(pool);
101
102  while (1)
103    {
104      svn_error_t *err;
105      apr_uint64_t ui64;
106
107      svn_pool_clear(iterpool);
108
109      /* Read a key length line.  Might be END, though. */
110      SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool));
111
112      /* Check for the end of the hash. */
113      if ((!terminator && eof && buf->len == 0)
114          || (terminator && (strcmp(buf->data, terminator) == 0)))
115        break;
116
117      /* Check for unexpected end of stream */
118      if (eof)
119        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
120                                _("Serialized hash missing terminator"));
121
122      if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
123        {
124          /* Get the length of the key */
125          err = svn_cstring_strtoui64(&ui64, buf->data + 2,
126                                      0, APR_SIZE_MAX, 10);
127          if (err)
128            return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
129                                    _("Serialized hash malformed"));
130          keylen = (apr_size_t)ui64;
131
132          /* Now read that much into a buffer. */
133          keybuf = apr_palloc(pool, keylen + 1);
134          SVN_ERR(svn_stream_read(stream, keybuf, &keylen));
135          keybuf[keylen] = '\0';
136
137          /* Suck up extra newline after key data */
138          len = 1;
139          SVN_ERR(svn_stream_read(stream, &c, &len));
140          if (c != '\n')
141            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
142                                    _("Serialized hash malformed"));
143
144          /* Read a val length line */
145          SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool));
146
147          if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
148            {
149              err = svn_cstring_strtoui64(&ui64, buf->data + 2,
150                                          0, APR_SIZE_MAX, 10);
151              if (err)
152                return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
153                                        _("Serialized hash malformed"));
154              vallen = (apr_size_t)ui64;
155
156              valbuf = apr_palloc(iterpool, vallen + 1);
157              SVN_ERR(svn_stream_read(stream, valbuf, &vallen));
158              valbuf[vallen] = '\0';
159
160              /* Suck up extra newline after val data */
161              len = 1;
162              SVN_ERR(svn_stream_read(stream, &c, &len));
163              if (c != '\n')
164                return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
165                                        _("Serialized hash malformed"));
166
167              /* Add a new hash entry. */
168              apr_hash_set(hash, keybuf, keylen,
169                           svn_string_ncreate(valbuf, vallen, pool));
170            }
171          else
172            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
173                                    _("Serialized hash malformed"));
174        }
175      else if (incremental && (buf->len >= 3)
176               && (buf->data[0] == 'D') && (buf->data[1] == ' '))
177        {
178          /* Get the length of the key */
179          err = svn_cstring_strtoui64(&ui64, buf->data + 2,
180                                      0, APR_SIZE_MAX, 10);
181          if (err)
182            return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
183                                    _("Serialized hash malformed"));
184          keylen = (apr_size_t)ui64;
185
186          /* Now read that much into a buffer. */
187          keybuf = apr_palloc(iterpool, keylen + 1);
188          SVN_ERR(svn_stream_read(stream, keybuf, &keylen));
189          keybuf[keylen] = '\0';
190
191          /* Suck up extra newline after key data */
192          len = 1;
193          SVN_ERR(svn_stream_read(stream, &c, &len));
194          if (c != '\n')
195            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
196                                    _("Serialized hash malformed"));
197
198          /* Remove this hash entry. */
199          apr_hash_set(hash, keybuf, keylen, NULL);
200        }
201      else
202        {
203          return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
204                                  _("Serialized hash malformed"));
205        }
206    }
207
208  svn_pool_destroy(iterpool);
209  return SVN_NO_ERROR;
210}
211
212
213/* Implements svn_hash_write2 and svn_hash_write_incremental. */
214static svn_error_t *
215hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
216           const char *terminator, apr_pool_t *pool)
217{
218  apr_pool_t *subpool;
219  apr_size_t len;
220  apr_array_header_t *list;
221  int i;
222
223  subpool = svn_pool_create(pool);
224
225  list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
226  for (i = 0; i < list->nelts; i++)
227    {
228      svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
229      svn_string_t *valstr = item->value;
230
231      svn_pool_clear(subpool);
232
233      /* Don't output entries equal to the ones in oldhash, if present. */
234      if (oldhash)
235        {
236          svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
237
238          if (oldstr && svn_string_compare(valstr, oldstr))
239            continue;
240        }
241
242      if (item->klen < 0)
243        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
244                                _("Cannot serialize negative length"));
245
246      /* Write it out. */
247      SVN_ERR(svn_stream_printf(stream, subpool,
248                                "K %" APR_SIZE_T_FMT "\n%s\n"
249                                "V %" APR_SIZE_T_FMT "\n",
250                                (apr_size_t) item->klen,
251                                (const char *) item->key,
252                                valstr->len));
253      len = valstr->len;
254      SVN_ERR(svn_stream_write(stream, valstr->data, &len));
255      SVN_ERR(svn_stream_puts(stream, "\n"));
256    }
257
258  if (oldhash)
259    {
260      /* Output a deletion entry for each property in oldhash but not hash. */
261      list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
262                            pool);
263      for (i = 0; i < list->nelts; i++)
264        {
265          svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
266
267          svn_pool_clear(subpool);
268
269          /* If it's not present in the new hash, write out a D entry. */
270          if (! apr_hash_get(hash, item->key, item->klen))
271            SVN_ERR(svn_stream_printf(stream, subpool,
272                                      "D %" APR_SSIZE_T_FMT "\n%s\n",
273                                      item->klen, (const char *) item->key));
274        }
275    }
276
277  if (terminator)
278    SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
279
280  svn_pool_destroy(subpool);
281  return SVN_NO_ERROR;
282}
283
284
285svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
286                            const char *terminator, apr_pool_t *pool)
287{
288  return hash_read(hash, stream, terminator, FALSE, pool);
289}
290
291
292svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
293                                       svn_stream_t *stream,
294                                       const char *terminator,
295                                       apr_pool_t *pool)
296{
297  return hash_read(hash, stream, terminator, TRUE, pool);
298}
299
300
301svn_error_t *
302svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
303                const char *terminator, apr_pool_t *pool)
304{
305  return hash_write(hash, NULL, stream, terminator, pool);
306}
307
308
309svn_error_t *
310svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
311                           svn_stream_t *stream, const char *terminator,
312                           apr_pool_t *pool)
313{
314  SVN_ERR_ASSERT(oldhash != NULL);
315  return hash_write(hash, oldhash, stream, terminator, pool);
316}
317
318
319svn_error_t *
320svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
321{
322  return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
323                    SVN_HASH_TERMINATOR, pool);
324}
325
326
327/* There are enough quirks in the deprecated svn_hash_read that we
328   should just preserve its implementation. */
329svn_error_t *
330svn_hash_read(apr_hash_t *hash,
331              apr_file_t *srcfile,
332              apr_pool_t *pool)
333{
334  svn_error_t *err;
335  char buf[SVN_KEYLINE_MAXLEN];
336  apr_size_t num_read;
337  char c;
338  int first_time = 1;
339
340
341  while (1)
342    {
343      /* Read a key length line.  Might be END, though. */
344      apr_size_t len = sizeof(buf);
345
346      err = svn_io_read_length_line(srcfile, buf, &len, pool);
347      if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
348        {
349          /* We got an EOF on our very first attempt to read, which
350             means it's a zero-byte file.  No problem, just go home. */
351          svn_error_clear(err);
352          return SVN_NO_ERROR;
353        }
354      else if (err)
355        /* Any other circumstance is a genuine error. */
356        return err;
357
358      first_time = 0;
359
360      if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
361          || ((len == 9)
362              && (buf[0] == 'P')
363              && (buf[1] == 'R')       /* We formerly used just "END" to */
364              && (buf[2] == 'O')       /* end a property hash, but later */
365              && (buf[3] == 'P')       /* we added "PROPS-END", so that  */
366              && (buf[4] == 'S')       /* the fs dump format would be    */
367              && (buf[5] == '-')       /* more human-readable.  That's   */
368              && (buf[6] == 'E')       /* why we accept either way here. */
369              && (buf[7] == 'N')
370              && (buf[8] == 'D')))
371        {
372          /* We've reached the end of the dumped hash table, so leave. */
373          return SVN_NO_ERROR;
374        }
375      else if ((buf[0] == 'K') && (buf[1] == ' '))
376        {
377          size_t keylen;
378          int parsed_len;
379          void *keybuf;
380
381          /* Get the length of the key */
382          SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
383          keylen = parsed_len;
384
385          /* Now read that much into a buffer, + 1 byte for null terminator */
386          keybuf = apr_palloc(pool, keylen + 1);
387          SVN_ERR(svn_io_file_read_full2(srcfile,
388                                         keybuf, keylen,
389                                         &num_read, NULL, pool));
390          ((char *) keybuf)[keylen] = '\0';
391
392          /* Suck up extra newline after key data */
393          SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
394          if (c != '\n')
395            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
396
397          /* Read a val length line */
398          len = sizeof(buf);
399          SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
400
401          if ((buf[0] == 'V') && (buf[1] == ' '))
402            {
403              svn_string_t *value = apr_palloc(pool, sizeof(*value));
404              apr_size_t vallen;
405              void *valbuf;
406
407              /* Get the length of the value */
408              SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
409              vallen = parsed_len;
410
411              /* Again, 1 extra byte for the null termination. */
412              valbuf = apr_palloc(pool, vallen + 1);
413              SVN_ERR(svn_io_file_read_full2(srcfile,
414                                             valbuf, vallen,
415                                             &num_read, NULL, pool));
416              ((char *) valbuf)[vallen] = '\0';
417
418              /* Suck up extra newline after val data */
419              SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
420              if (c != '\n')
421                return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
422
423              value->data = valbuf;
424              value->len = vallen;
425
426              /* The Grand Moment:  add a new hash entry! */
427              apr_hash_set(hash, keybuf, keylen, value);
428            }
429          else
430            {
431              return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
432            }
433        }
434      else
435        {
436          return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
437        }
438    } /* while (1) */
439}
440
441
442
443/*** Diffing hashes ***/
444
445svn_error_t *
446svn_hash_diff(apr_hash_t *hash_a,
447              apr_hash_t *hash_b,
448              svn_hash_diff_func_t diff_func,
449              void *diff_func_baton,
450              apr_pool_t *pool)
451{
452  apr_hash_index_t *hi;
453
454  if (hash_a)
455    for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
456      {
457        const void *key;
458        apr_ssize_t klen;
459
460        apr_hash_this(hi, &key, &klen, NULL);
461
462        if (hash_b && (apr_hash_get(hash_b, key, klen)))
463          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
464                               diff_func_baton));
465        else
466          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
467                               diff_func_baton));
468      }
469
470  if (hash_b)
471    for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
472      {
473        const void *key;
474        apr_ssize_t klen;
475
476        apr_hash_this(hi, &key, &klen, NULL);
477
478        if (! (hash_a && apr_hash_get(hash_a, key, klen)))
479          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
480                               diff_func_baton));
481      }
482
483  return SVN_NO_ERROR;
484}
485
486
487/*** Misc. hash APIs ***/
488
489svn_error_t *
490svn_hash_keys(apr_array_header_t **array,
491              apr_hash_t *hash,
492              apr_pool_t *pool)
493{
494  apr_hash_index_t *hi;
495
496  *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
497
498  for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
499    {
500      APR_ARRAY_PUSH(*array, const char *) = svn__apr_hash_index_key(hi);
501    }
502
503  return SVN_NO_ERROR;
504}
505
506
507svn_error_t *
508svn_hash_from_cstring_keys(apr_hash_t **hash_p,
509                           const apr_array_header_t *keys,
510                           apr_pool_t *pool)
511{
512  int i;
513  apr_hash_t *hash = svn_hash__make(pool);
514  for (i = 0; i < keys->nelts; i++)
515    {
516      const char *key =
517        apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
518      svn_hash_sets(hash, key, key);
519    }
520  *hash_p = hash;
521  return SVN_NO_ERROR;
522}
523
524
525#if !APR_VERSION_AT_LEAST(1, 3, 0)
526void
527svn_hash__clear(apr_hash_t *hash)
528{
529  apr_hash_index_t *hi;
530  const void *key;
531  apr_ssize_t klen;
532
533  for (hi = apr_hash_first(NULL, hash); hi; hi = apr_hash_next(hi))
534    {
535      apr_hash_this(hi, &key, &klen, NULL);
536      apr_hash_set(hash, key, klen, NULL);
537    }
538}
539#endif
540
541
542
543/*** Specialized getter APIs ***/
544
545const char *
546svn_hash__get_cstring(apr_hash_t *hash,
547                      const char *key,
548                      const char *default_value)
549{
550  if (hash)
551    {
552      const char *value = svn_hash_gets(hash, key);
553      return value ? value : default_value;
554    }
555
556  return default_value;
557}
558
559
560svn_boolean_t
561svn_hash__get_bool(apr_hash_t *hash, const char *key,
562                   svn_boolean_t default_value)
563{
564  const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
565  svn_tristate_t value = svn_tristate__from_word(tmp_value);
566
567  if (value == svn_tristate_true)
568    return TRUE;
569  else if (value == svn_tristate_false)
570    return FALSE;
571
572  return default_value;
573}
574
575
576
577/*** Optimized hash function ***/
578
579/* Optimized version of apr_hashfunc_default in APR 1.4.5 and earlier.
580 * It assumes that the CPU has 32-bit multiplications with high throughput
581 * of at least 1 operation every 3 cycles. Latency is not an issue. Another
582 * optimization is a mildly unrolled main loop and breaking the dependency
583 * chain within the loop.
584 *
585 * Note that most CPUs including Intel Atom, VIA Nano, ARM feature the
586 * assumed pipelined multiplication circuitry. They can do one MUL every
587 * or every other cycle.
588 *
589 * The performance is ultimately limited by the fact that most CPUs can
590 * do only one LOAD and only one BRANCH operation per cycle. The best we
591 * can do is to process one character per cycle - provided the processor
592 * is wide enough to do 1 LOAD, COMPARE, BRANCH, MUL and ADD per cycle.
593 */
594static unsigned int
595hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
596{
597    unsigned int hash = 0;
598    const unsigned char *key = (const unsigned char *)char_key;
599    const unsigned char *p;
600    apr_ssize_t i;
601
602    if (*klen == APR_HASH_KEY_STRING)
603      {
604        for (p = key; ; p+=4)
605          {
606            unsigned int new_hash = hash * 33 * 33 * 33 * 33;
607            if (!p[0]) break;
608            new_hash += p[0] * 33 * 33 * 33;
609            if (!p[1]) break;
610            new_hash += p[1] * 33 * 33;
611            if (!p[2]) break;
612            new_hash += p[2] * 33;
613            if (!p[3]) break;
614            hash = new_hash + p[3];
615          }
616        for (; *p; p++)
617            hash = hash * 33 + *p;
618
619        *klen = p - key;
620      }
621    else
622      {
623        for (p = key, i = *klen; i >= 4; i-=4, p+=4)
624          {
625            hash = hash * 33 * 33 * 33 * 33
626                 + p[0] * 33 * 33 * 33
627                 + p[1] * 33 * 33
628                 + p[2] * 33
629                 + p[3];
630          }
631        for (; i; i--, p++)
632            hash = hash * 33 + *p;
633      }
634
635    return hash;
636}
637
638apr_hash_t *
639svn_hash__make(apr_pool_t *pool)
640{
641  return apr_hash_make_custom(pool, hashfunc_compatible);
642}
643