cache-membuffer.c revision 362181
1/*
2 * cache-membuffer.c: in-memory caching for Subversion
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24#include <assert.h>
25#include <apr_md5.h>
26#include <apr_thread_rwlock.h>
27
28#include "svn_pools.h"
29#include "svn_checksum.h"
30#include "svn_private_config.h"
31#include "svn_hash.h"
32#include "svn_string.h"
33#include "svn_sorts.h"  /* get the MIN macro */
34
35#include "private/svn_atomic.h"
36#include "private/svn_dep_compat.h"
37#include "private/svn_mutex.h"
38#include "private/svn_subr_private.h"
39#include "private/svn_string_private.h"
40
41#include "cache.h"
42#include "fnv1a.h"
43
44/*
45 * This svn_cache__t implementation actually consists of two parts:
46 * a shared (per-process) singleton membuffer cache instance and shallow
47 * svn_cache__t front-end instances that each use different key spaces.
48 * For data management, they all forward to the singleton membuffer cache.
49 *
50 * A membuffer cache consists of two parts:
51 *
52 * 1. A linear data buffer containing cached items in a serialized
53 *    representation, prefixed by their full cache keys. There may be
54 *    arbitrary gaps between entries.  This buffer is sub-devided into
55 *    (currently two) cache levels.
56 *
57 * 2. A directory of cache entries. This is organized similar to CPU
58 *    data caches: for every possible key, there is exactly one group
59 *    of entries that may contain the header info for an item with
60 *    that given key.  The result is a GROUP_SIZE+-way associative cache
61 *    whose associativity can be dynamically increased.
62 *
63 * Only the start address of these two data parts are given as a native
64 * pointer. All other references are expressed as offsets to these pointers.
65 * With that design, it is relatively easy to share the same data structure
66 * between different processes and / or to persist them on disk. These
67 * out-of-process features have not been implemented, yet.
68 *
69 * Superficially, cache levels are being used as usual: insertion happens
70 * into L1 and evictions will promote items to L2.  But their whole point
71 * is a different one.  L1 uses a circular buffer, i.e. we have perfect
72 * caching for the last N bytes where N is the size of L1.  L2 uses a more
73 * elaborate scheme based on priorities and hit counts as described below.
74 *
75 * The data buffer usage information is implicitly given by the directory
76 * entries. Every USED entry has a reference to the previous and the next
77 * used dictionary entry and this double-linked list is ordered by the
78 * offsets of their item data within the data buffer. So removing data,
79 * for instance, is done simply by unlinking it from the chain, implicitly
80 * marking the entry as well as the data buffer section previously
81 * associated to it as unused.  First and last element of that chain are
82 * being referenced from the respective cache level.
83 *
84 * Insertion can occur at only one, sliding position per cache level.  It is
85 * marked by its offset in the data buffer and the index of the first used
86 * entry at or behind that position.  If this gap is too small to accommodate
87 * the new item (plus its full key), the insertion window is extended as
88 * described below.  The new entry will always be inserted at the bottom end
89 * of the window and since the next used entry is known, properly sorted
90 * insertion is possible.
91 *
92 * To make the cache perform robustly in a wide range of usage scenarios,
93 * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
94 * details). Every item holds a read hit counter and there is a global read
95 * hit counter. The more hits an entry has in relation to the average, the
96 * more it is likely to be kept using a rand()-based condition. The test is
97 * applied only to the entry following the insertion window. If it doesn't
98 * get evicted, it is moved to the begin of that window and the window is
99 * moved.
100 *
101 * Moreover, the entry's hits get halved to make that entry more likely to
102 * be removed the next time the sliding insertion / removal window comes by.
103 * As a result, frequently used entries are likely not to be dropped until
104 * they get not used for a while. Also, even a cache thrashing situation
105 * about 50% of the content survives every 50% of the cache being re-written
106 * with new entries. For details on the fine-tuning involved, see the
107 * comments in ensure_data_insertable_l2().
108 *
109 * Due to the randomized mapping of keys to entry groups, some groups may
110 * overflow.  In that case, there are spare groups that can be chained to
111 * an already used group to extend it.
112 *
113 * To limit the entry size and management overhead, not the actual item keys
114 * but only their hashed "fingerprint" will be stored.  These are reasonably
115 * unique to prevent collisions, so we only need to support up to one entry
116 * per entry key.  To guarantee that there are no conflicts, however, we
117 * store the actual full key immediately in front of the serialized item
118 * data.  That is, the entry offset actually points to the full key and the
119 * key length stored in the entry acts as an additional offset to find the
120 * actual item.
121 *
122 * Most keys are 16 bytes or less.  We use the prefix indexes returned by
123 * a prefix_pool_t instance to uniquely identify the prefix in that case.
124 * Then the combination of prefix index and key stored in the fingerprint
125 * is then unique, too, and can never conflict.  No full key construction,
126 * storage and comparison is needed in that case.
127 *
128 * All access to the cached data needs to be serialized. Because we want
129 * to scale well despite that bottleneck, we simply segment the cache into
130 * a number of independent caches (segments). Items will be multiplexed based
131 * on their hash key.
132 */
133
134/* APR's read-write lock implementation on Windows is horribly inefficient.
135 * Even with very low contention a runtime overhead of 35% percent has been
136 * measured for 'svn-bench null-export' over ra_serf.
137 *
138 * Use a simple mutex on Windows.  Because there is one mutex per segment,
139 * large machines should (and usually can) be configured with large caches
140 * such that read contention is kept low.  This is basically the situation
141 * we had before 1.8.
142 */
143#ifdef WIN32
144#  define USE_SIMPLE_MUTEX 1
145#else
146#  define USE_SIMPLE_MUTEX 0
147#endif
148
149/* For more efficient copy operations, let's align all data items properly.
150 * Since we can't portably align pointers, this is rather the item size
151 * granularity which ensures *relative* alignment within the cache - still
152 * giving us decent copy speeds on most machines.
153 *
154 * Must be a power of 2.
155 */
156#define ITEM_ALIGNMENT 16
157
158/* By default, don't create cache segments smaller than this value unless
159 * the total cache size itself is smaller.
160 */
161#define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
162
163/* The minimum segment size we will allow for multi-segmented caches
164 */
165#define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
166
167/* The maximum number of segments allowed. Larger numbers reduce the size
168 * of each segment, in turn reducing the max size of a cachable item.
169 * Also, each segment gets its own lock object. The actual number supported
170 * by the OS may therefore be lower and svn_cache__membuffer_cache_create
171 * may return an error.
172 */
173#define MAX_SEGMENT_COUNT 0x10000
174
175/* As of today, APR won't allocate chunks of 4GB or more. So, limit the
176 * segment size to slightly below that.
177 */
178#define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
179
180/* We don't mark the initialization status for every group but initialize
181 * a number of groups at once. That will allow for a very small init flags
182 * vector that is likely to fit into the CPU caches even for fairly large
183 * membuffer caches. For instance, the default of 32 means 8x32 groups per
184 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
185 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
186 */
187#define GROUP_INIT_GRANULARITY 32
188
189/* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
190 */
191#define NO_INDEX APR_UINT32_MAX
192
193/* To save space in our group structure, we only use 32 bit size values
194 * and, therefore, limit the size of each entry to just below 4GB.
195 * Supporting larger items is not a good idea as the data transfer
196 * to and from the cache would block other threads for a very long time.
197 */
198#define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
199
200/* We use this structure to identify cache entries. There cannot be two
201 * entries with the same entry key. However unlikely, though, two different
202 * full keys (see full_key_t) may have the same entry key.  That is a
203 * collision and at most one of them can be stored in the cache at any time.
204 *
205 * If the prefix is shared, which implies that the variable key part is no
206 * longer than 16 bytes, then there is a 1:1 mapping between full key and
207 * entry key.
208 */
209typedef struct entry_key_t
210{
211  /* 16 byte finger print of the full key. */
212  apr_uint64_t fingerprint[2];
213
214  /* Length of the full key.  This value is aligned to ITEM_ALIGNMENT to
215   * make sure the subsequent item content is properly aligned.  If 0,
216   * PREFIX_KEY is implied to be != NO_INDEX. */
217  apr_size_t key_len;
218
219  /* Unique index of the shared key prefix, i.e. it's index within the
220   * prefix pool (see prefix_pool_t).  NO_INDEX if the key prefix is not
221   * shared, otherwise KEY_LEN==0 is implied. */
222  apr_uint32_t prefix_idx;
223} entry_key_t;
224
225/* A full key, i.e. the combination of the cache's key prefix with some
226 * dynamic part appended to it.  It also contains its ENTRY_KEY.
227 *
228 * If the ENTRY_KEY has a 1:1 mapping to the FULL_KEY, then the latter
229 * will be empty and remains unused.
230 */
231typedef struct full_key_t
232{
233  /* Reduced form identifying the cache entry (if such an entry exists). */
234  entry_key_t entry_key;
235
236  /* If ENTRY_KEY is not a 1:1 mapping of the prefix + dynamic key
237   * combination,  then this contains the full combination.  Note that the
238   * SIZE element may be larger than ENTRY_KEY.KEY_LEN, but only the latter
239   * determines the valid key size. */
240  svn_membuf_t full_key;
241} full_key_t;
242
243/* A limited capacity, thread-safe pool of unique C strings.  Operations on
244 * this data structure are defined by prefix_pool_* functions.  The only
245 * "public" member is VALUES (r/o access only).
246 */
247typedef struct prefix_pool_t
248{
249  /* Map C string to a pointer into VALUES with the same contents. */
250  apr_hash_t *map;
251
252  /* Pointer to an array of strings. These are the contents of this pool
253   * and each one of them is referenced by MAP.  Valid indexes are 0 to
254   * VALUES_USED - 1.  May be NULL if VALUES_MAX is 0. */
255  const char **values;
256
257  /* Number of used entries that VALUES may have. */
258  apr_uint32_t values_max;
259
260  /* Number of used entries in VALUES.  Never exceeds VALUES_MAX. */
261  apr_uint32_t values_used;
262
263  /* Maximum number of bytes to allocate. */
264  apr_size_t bytes_max;
265
266  /* Number of bytes currently allocated.  Should not exceed BYTES_MAX but
267   * the implementation may . */
268  apr_size_t bytes_used;
269
270  /* The serialization object. */
271  svn_mutex__t *mutex;
272} prefix_pool_t;
273
274/* Set *PREFIX_POOL to a new instance that tries to limit allocation to
275 * BYTES_MAX bytes.  If MUTEX_REQUIRED is set and multi-threading is
276 * supported, serialize all access to the new instance.  Allocate the
277 * object from *RESULT_POOL. */
278static svn_error_t *
279prefix_pool_create(prefix_pool_t **prefix_pool,
280                   apr_size_t bytes_max,
281                   svn_boolean_t mutex_required,
282                   apr_pool_t *result_pool)
283{
284  enum
285    {
286      /* With 56 byes of overhead under 64 bits, we will probably never get
287       * substantially below this.  If we accidentally do, we will simply
288       * run out of entries in the VALUES array before running out of
289       * allocated memory. */
290      ESTIMATED_BYTES_PER_ENTRY = 120
291    };
292
293  /* Number of entries we are going to support. */
294  apr_size_t capacity = MIN(APR_UINT32_MAX,
295                            bytes_max / ESTIMATED_BYTES_PER_ENTRY);
296
297  /* Construct the result struct. */
298  prefix_pool_t *result = apr_pcalloc(result_pool, sizeof(*result));
299  result->map = svn_hash__make(result_pool);
300
301  result->values = capacity
302                 ? apr_pcalloc(result_pool, capacity * sizeof(const char *))
303                 : NULL;
304  result->values_max = (apr_uint32_t)capacity;
305  result->values_used = 0;
306
307  result->bytes_max = bytes_max;
308  result->bytes_used = capacity * sizeof(svn_membuf_t);
309
310  SVN_ERR(svn_mutex__init(&result->mutex, mutex_required, result_pool));
311
312  /* Done. */
313  *prefix_pool = result;
314  return SVN_NO_ERROR;
315}
316
317/* Set *PREFIX_IDX to the offset in PREFIX_POOL->VALUES that contains the
318 * value PREFIX.  If none exists, auto-insert it.  If we can't due to
319 * capacity exhaustion, set *PREFIX_IDX to NO_INDEX.
320 * To be called by prefix_pool_get() only. */
321static svn_error_t *
322prefix_pool_get_internal(apr_uint32_t *prefix_idx,
323                         prefix_pool_t *prefix_pool,
324                         const char *prefix)
325{
326  enum
327    {
328      /* Size of an hash entry plus (max.) APR alignment loss.
329       *
330       * This may be slightly off if e.g. APR changes its internal data
331       * structures but that will translate in just a few percent (~10%)
332       * over-allocation.  Memory consumption will still be capped.
333       */
334      OVERHEAD = 40 + 8
335    };
336
337  const char **value;
338  apr_size_t prefix_len = strlen(prefix);
339  apr_size_t bytes_needed;
340  apr_pool_t *pool;
341
342  /* Lookup.  If we already know that prefix, return its index. */
343  value = apr_hash_get(prefix_pool->map, prefix, prefix_len);
344  if (value != NULL)
345    {
346      const apr_size_t idx = value - prefix_pool->values;
347      SVN_ERR_ASSERT(idx < prefix_pool->values_used);
348      *prefix_idx = (apr_uint32_t) idx;
349      return SVN_NO_ERROR;
350    }
351
352  /* Capacity checks. */
353  if (prefix_pool->values_used == prefix_pool->values_max)
354    {
355      *prefix_idx = NO_INDEX;
356      return SVN_NO_ERROR;
357    }
358
359  bytes_needed = prefix_len + 1 + OVERHEAD;
360  assert(prefix_pool->bytes_max >= prefix_pool->bytes_used);
361  if (prefix_pool->bytes_max - prefix_pool->bytes_used < bytes_needed)
362    {
363      *prefix_idx = NO_INDEX;
364      return SVN_NO_ERROR;
365    }
366
367  /* Add new entry. */
368  pool = apr_hash_pool_get(prefix_pool->map);
369
370  value = &prefix_pool->values[prefix_pool->values_used];
371  *value = apr_pstrndup(pool, prefix, prefix_len + 1);
372  apr_hash_set(prefix_pool->map, *value, prefix_len, value);
373
374  *prefix_idx = prefix_pool->values_used;
375  ++prefix_pool->values_used;
376  prefix_pool->bytes_used += bytes_needed;
377
378  return SVN_NO_ERROR;
379}
380
381/* Thread-safe wrapper around prefix_pool_get_internal. */
382static svn_error_t *
383prefix_pool_get(apr_uint32_t *prefix_idx,
384                prefix_pool_t *prefix_pool,
385                const char *prefix)
386{
387  SVN_MUTEX__WITH_LOCK(prefix_pool->mutex,
388                       prefix_pool_get_internal(prefix_idx, prefix_pool,
389                                                prefix));
390
391  return SVN_NO_ERROR;
392}
393
394/* Debugging / corruption detection support.
395 * If you define this macro, the getter functions will performed expensive
396 * checks on the item data, requested keys and entry types. If there is
397 * a mismatch found in any of them when being compared with the values
398 * remembered in the setter function, an error will be returned.
399 */
400#ifdef SVN_DEBUG_CACHE_MEMBUFFER
401
402/* The prefix passed to svn_cache__create_membuffer_cache() effectively
403 * defines the type of all items stored by that cache instance. We'll take
404 * the last 15 bytes + \0 as plaintext for easy identification by the dev.
405 */
406#define PREFIX_TAIL_LEN 16
407
408/* This record will be attached to any cache entry. It tracks item data
409 * (content), key and type as hash values and is the baseline against which
410 * the getters will compare their results to detect inconsistencies.
411 */
412typedef struct entry_tag_t
413{
414  /* MD5 checksum over the serialized item data.
415   */
416  unsigned char content_hash[APR_MD5_DIGESTSIZE];
417
418  /* Hash value of the svn_cache_t instance that wrote the item
419   * (i.e. a combination of type and repository)
420   */
421  unsigned char prefix_hash[APR_MD5_DIGESTSIZE];
422
423  /* Note that this only covers the variable part of the key,
424   * i.e. it will be different from the full key hash used for
425   * cache indexing.
426   */
427  unsigned char key_hash[APR_MD5_DIGESTSIZE];
428
429  /* Last letters from of the key in human readable format
430   * (ends with the type identifier, e.g. "DAG")
431   */
432  char prefix_tail[PREFIX_TAIL_LEN];
433
434  /* Length of the variable key part.
435   */
436  apr_size_t key_len;
437
438} entry_tag_t;
439
440/* Initialize all members of TAG except for the content hash.
441 */
442static svn_error_t *store_key_part(entry_tag_t *tag,
443                                   const char *prefix,
444                                   const void *key,
445                                   apr_size_t key_len,
446                                   apr_pool_t *scratch_pool)
447{
448  svn_checksum_t *checksum;
449  apr_size_t prefix_len = strlen(prefix);
450
451  if (prefix_len > sizeof(tag->prefix_tail))
452    {
453      prefix += prefix_len - (sizeof(tag->prefix_tail) - 1);
454      prefix_len = sizeof(tag->prefix_tail) - 1;
455    }
456
457  SVN_ERR(svn_checksum(&checksum,
458                       svn_checksum_md5,
459                       prefix,
460                       strlen(prefix),
461                       scratch_pool));
462  memcpy(tag->prefix_hash, checksum->digest, sizeof(tag->prefix_hash));
463
464  SVN_ERR(svn_checksum(&checksum,
465                       svn_checksum_md5,
466                       key,
467                       key_len,
468                       scratch_pool));
469  memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
470
471  memset(tag->prefix_tail, 0, sizeof(tag->key_hash));
472  memcpy(tag->prefix_tail, prefix, prefix_len + 1);
473
474  tag->key_len = key_len;
475
476  return SVN_NO_ERROR;
477}
478
479/* Initialize the content hash member of TAG.
480 */
481static svn_error_t* store_content_part(entry_tag_t *tag,
482                                       const void *data,
483                                       apr_size_t size,
484                                       apr_pool_t *pool)
485{
486  svn_checksum_t *checksum;
487  SVN_ERR(svn_checksum(&checksum,
488                       svn_checksum_md5,
489                       data,
490                       size,
491                       pool));
492
493  memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
494
495  return SVN_NO_ERROR;
496}
497
498/* Compare two tags and fail with an assertion upon differences.
499 */
500static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
501                                      const entry_tag_t *rhs)
502{
503  SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
504                        sizeof(lhs->content_hash)) == 0);
505  SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
506                        sizeof(lhs->prefix_hash)) == 0);
507  SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
508                        sizeof(lhs->key_hash)) == 0);
509  SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
510                        sizeof(lhs->prefix_tail)) == 0);
511
512  SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
513
514  return SVN_NO_ERROR;
515}
516
517/* Reoccurring code snippets.
518 */
519
520#define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
521
522#define DEBUG_CACHE_MEMBUFFER_TAG tag,
523
524#define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)                     \
525  entry_tag_t _tag;                                              \
526  entry_tag_t *tag = &_tag;                                      \
527  if (key)                                                       \
528    SVN_ERR(store_key_part(tag,                                  \
529                           get_prefix_key(cache),                \
530                           key,                                  \
531                           cache->key_len == APR_HASH_KEY_STRING \
532                               ? strlen((const char *) key)      \
533                               : cache->key_len,                 \
534                           pool));
535
536#else
537
538/* Don't generate any checks if consistency checks have not been enabled.
539 */
540#define DEBUG_CACHE_MEMBUFFER_TAG_ARG
541#define DEBUG_CACHE_MEMBUFFER_TAG
542#define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)
543
544#endif /* SVN_DEBUG_CACHE_MEMBUFFER */
545
546/* A single dictionary entry. Since all entries will be allocated once
547 * during cache creation, those entries might be either used or unused.
548 * An entry is used if and only if it is contained in the doubly-linked
549 * list of used entries per cache level.
550 */
551typedef struct entry_t
552{
553  /* Identifying the data item. Only valid for used entries.
554   */
555  entry_key_t key;
556
557  /* The offset of the cached item's serialized data within the caches
558   * DATA buffer.
559   */
560  apr_uint64_t offset;
561
562  /* Size of the serialized item data. May be 0.  The MAX_ITEM_SIZE macro
563   * above ensures that there will be no overflows.
564   * Only valid for used entries.
565   */
566  apr_size_t size;
567
568  /* Number of (read) hits for this entry. Will be reset upon write.
569   * Only valid for used entries.
570   */
571  svn_atomic_t hit_count;
572
573  /* Reference to the next used entry in the order defined by offset.
574   * NO_INDEX indicates the end of the list; this entry must be referenced
575   * by the caches cache_level_t.last member.  NO_INDEX also implies that
576   * the data buffer is not used beyond offset+size.
577   * Only valid for used entries.
578   */
579  apr_uint32_t next;
580
581  /* Reference to the previous used entry in the order defined by offset.
582   * NO_INDEX indicates the end of the list; this entry must be referenced
583   * by the caches cache_level_t.first member.
584   * Only valid for used entries.
585   */
586  apr_uint32_t previous;
587
588  /* Priority of this entry.  This entry will not be replaced by lower-
589   * priority items.
590   */
591  apr_uint32_t priority;
592#ifdef SVN_DEBUG_CACHE_MEMBUFFER
593  /* Remember type, content and key hashes.
594   */
595  entry_tag_t tag;
596#endif
597} entry_t;
598
599/* Group header struct.
600 */
601typedef struct group_header_t
602{
603  /* number of entries used [0 .. USED-1] */
604  apr_uint32_t used;
605
606  /* next group in the chain or NO_INDEX for the last.
607   * For recycleable unused spare groups, this points to the next
608   * unused spare group */
609  apr_uint32_t next;
610
611  /* previously group in the chain or NO_INDEX for the first */
612  apr_uint32_t previous;
613
614  /* number of elements in the chain from start to here.
615   * >= 1 for used groups, 0 for unused spare groups */
616  apr_uint32_t chain_length;
617
618} group_header_t;
619
620/* The size of the group struct should be a power of two make sure it does
621 * not cross memory page boundaries.  Since we already access the cache
622 * randomly, having two page table lookups instead of one is bad.
623 */
624#define GROUP_BLOCK_SIZE 512
625
626/* A ~10-way associative cache seems to be a good compromise between
627 * performance (worst-case lookups) and efficiency-loss due to collisions.
628 *
629 * This value may be changed to any positive integer.
630 */
631#define GROUP_SIZE \
632          ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t))
633
634/* Maximum number of groups in a chain, i.e. a cache index group can hold
635 * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries.
636 */
637#define MAX_GROUP_CHAIN_LENGTH 8
638
639/* We group dictionary entries to make this GROUP-SIZE-way associative.
640 */
641typedef struct entry_group_t
642{
643  /* group globals */
644  group_header_t header;
645
646  /* padding and also room for future extensions */
647  char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t)
648               - sizeof(entry_t) * GROUP_SIZE];
649
650  /* the actual entries */
651  entry_t entries[GROUP_SIZE];
652
653} entry_group_t;
654
655/* Per-cache level header structure.  Instances of this are members of
656 * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
657 * All offset values are global / absolute to that whole buffer.
658 */
659typedef struct cache_level_t
660{
661  /* Reference to the first (defined by the order content in the data
662   * buffer) dictionary entry used by any data item.
663   * NO_INDEX for an empty cache.
664   */
665  apr_uint32_t first;
666
667  /* Reference to the last (defined by the order content in the data
668   * buffer) dictionary entry used by any data item.
669   * NO_INDEX for an empty cache.
670   */
671  apr_uint32_t last;
672
673  /* Reference to the first (defined by the order content in the data
674   * buffer) used dictionary entry behind the insertion position
675   * (current_data). If NO_INDEX, the data buffer is free starting at the
676   * current_data offset.
677   */
678  apr_uint32_t next;
679
680
681  /* First offset in the caches DATA buffer that belongs to this level.
682   */
683  apr_uint64_t start_offset;
684
685  /* Size of data buffer allocated to this level in bytes. Must be > 0.
686   */
687  apr_uint64_t size;
688
689  /* Offset in the data buffer where the next insertion shall occur.
690   */
691  apr_uint64_t current_data;
692
693} cache_level_t;
694
695/* The cache header structure.
696 */
697struct svn_membuffer_t
698{
699  /* Number of cache segments. Must be a power of 2.
700     Please note that this structure represents only one such segment
701     and that all segments must / will report the same values here. */
702  apr_uint32_t segment_count;
703
704  /* Collection of prefixes shared among all instances accessing the
705   * same membuffer cache backend.  If a prefix is contained in this
706   * pool then all cache instances using an equal prefix must actually
707   * use the one stored in this pool. */
708  prefix_pool_t *prefix_pool;
709
710  /* The dictionary, GROUP_SIZE * (group_count + spare_group_count)
711   * entries long.  Never NULL.
712   */
713  entry_group_t *directory;
714
715  /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
716   * Allows for efficiently marking groups as "not initialized".
717   */
718  unsigned char *group_initialized;
719
720  /* Size of dictionary in groups. Must be > 0.
721   */
722  apr_uint32_t group_count;
723
724  /* Total number of spare groups.
725   */
726  apr_uint32_t spare_group_count;
727
728  /* First recycleable spare group.
729   */
730  apr_uint32_t first_spare_group;
731
732  /* Maximum number of spare groups ever used.  I.e. group index
733   * group_count + max_spare_used is the first unused spare group
734   * if first_spare_group is NO_INDEX.
735   */
736  apr_uint32_t max_spare_used;
737
738  /* Pointer to the data buffer, data_size bytes long. Never NULL.
739   */
740  unsigned char *data;
741
742  /* Total number of data buffer bytes in use.
743   */
744  apr_uint64_t data_used;
745
746  /* Largest entry size that we would accept.  For total cache sizes
747   * less than 4TB (sic!), this is determined by the total cache size.
748   */
749  apr_uint64_t max_entry_size;
750
751  /* The cache levels, organized as sub-buffers.  Since entries in the
752   * DIRECTORY use offsets in DATA for addressing, a cache lookup does
753   * not need to know the cache level of a specific item.  Cache levels
754   * are only used to implement a hybrid insertion / eviction strategy.
755   */
756
757  /* First cache level, i.e. most insertions happen here.  Very large
758   * items might get inserted directly into L2.  L1 is a strict FIFO
759   * ring buffer that does not care about item priorities.  All evicted
760   * items get a chance to be promoted to L2.
761   */
762  cache_level_t l1;
763
764  /* Second cache level, i.e. data evicted from L1 will be added here
765   * if the item is "important" enough or the L2 insertion window is large
766   * enough.
767   */
768  cache_level_t l2;
769
770
771  /* Number of used dictionary entries, i.e. number of cached items.
772   * Purely statistical information that may be used for profiling only.
773   * Updates are not synchronized and values may be nonsensicle on some
774   * platforms.
775   */
776  apr_uint32_t used_entries;
777
778  /* Total number of calls to membuffer_cache_get.
779   * Purely statistical information that may be used for profiling only.
780   * Updates are not synchronized and values may be nonsensicle on some
781   * platforms.
782   */
783  apr_uint64_t total_reads;
784
785  /* Total number of calls to membuffer_cache_set.
786   * Purely statistical information that may be used for profiling only.
787   * Updates are not synchronized and values may be nonsensicle on some
788   * platforms.
789   */
790  apr_uint64_t total_writes;
791
792  /* Total number of hits since the cache's creation.
793   * Purely statistical information that may be used for profiling only.
794   * Updates are not synchronized and values may be nonsensicle on some
795   * platforms.
796   */
797  apr_uint64_t total_hits;
798
799#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
800  /* A lock for intra-process synchronization to the cache, or NULL if
801   * the cache's creator doesn't feel the cache needs to be
802   * thread-safe.
803   */
804  svn_mutex__t *lock;
805#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
806  /* Same for read-write lock. */
807  apr_thread_rwlock_t *lock;
808
809  /* If set, write access will wait until they get exclusive access.
810   * Otherwise, they will become no-ops if the segment is currently
811   * read-locked.  Only used when LOCK is an r/w lock.
812   */
813  svn_boolean_t allow_blocking_writes;
814#endif
815
816  /* A write lock counter, must be either 0 or 1.
817   * This one is only used in debug assertions to verify that you used
818   * the correct multi-threading settings. */
819  svn_atomic_t write_lock_count;
820};
821
822/* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
823 */
824#define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
825
826/* If locking is supported for CACHE, acquire a read lock for it.
827 */
828static svn_error_t *
829read_lock_cache(svn_membuffer_t *cache)
830{
831#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
832  return svn_mutex__lock(cache->lock);
833#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
834  if (cache->lock)
835  {
836    apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
837    if (status)
838      return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
839  }
840
841  return SVN_NO_ERROR;
842#else
843  return SVN_NO_ERROR;
844#endif
845}
846
847/* If locking is supported for CACHE, acquire a write lock for it.
848 * Set *SUCCESS to FALSE, if we couldn't acquire the write lock;
849 * leave it untouched otherwise.
850 */
851static svn_error_t *
852write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
853{
854#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
855  return svn_mutex__lock(cache->lock);
856#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
857  if (cache->lock)
858    {
859      apr_status_t status;
860      if (cache->allow_blocking_writes)
861        {
862          status = apr_thread_rwlock_wrlock(cache->lock);
863        }
864      else
865        {
866          status = apr_thread_rwlock_trywrlock(cache->lock);
867          if (SVN_LOCK_IS_BUSY(status))
868            {
869              *success = FALSE;
870              status = APR_SUCCESS;
871            }
872        }
873
874      if (status)
875        return svn_error_wrap_apr(status,
876                                  _("Can't write-lock cache mutex"));
877    }
878
879  return SVN_NO_ERROR;
880#else
881  return SVN_NO_ERROR;
882#endif
883}
884
885/* If locking is supported for CACHE, acquire an unconditional write lock
886 * for it.
887 */
888static svn_error_t *
889force_write_lock_cache(svn_membuffer_t *cache)
890{
891#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
892  return svn_mutex__lock(cache->lock);
893#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
894  apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
895  if (status)
896    return svn_error_wrap_apr(status,
897                              _("Can't write-lock cache mutex"));
898
899  return SVN_NO_ERROR;
900#else
901  return SVN_NO_ERROR;
902#endif
903}
904
905/* If locking is supported for CACHE, release the current lock
906 * (read or write).  Return ERR upon success.
907 */
908static svn_error_t *
909unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
910{
911#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
912  return svn_mutex__unlock(cache->lock, err);
913#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
914  if (cache->lock)
915  {
916    apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
917    if (err)
918      return err;
919
920    if (status)
921      return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
922  }
923
924  return err;
925#else
926  return err;
927#endif
928}
929
930/* If supported, guard the execution of EXPR with a read lock to CACHE.
931 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
932 */
933#define WITH_READ_LOCK(cache, expr)         \
934do {                                        \
935  SVN_ERR(read_lock_cache(cache));          \
936  SVN_ERR(unlock_cache(cache, (expr)));     \
937} while (0)
938
939/* If supported, guard the execution of EXPR with a write lock to CACHE.
940 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
941 *
942 * The write lock process is complicated if we don't allow to wait for
943 * the lock: If we didn't get the lock, we may still need to remove an
944 * existing entry for the given key because that content is now stale.
945 * Once we discovered such an entry, we unconditionally do a blocking
946 * wait for the write lock.  In case no old content could be found, a
947 * failing lock attempt is simply a no-op and we exit the macro.
948 */
949#define WITH_WRITE_LOCK(cache, expr)                            \
950do {                                                            \
951  svn_boolean_t got_lock = TRUE;                                \
952  SVN_ERR(write_lock_cache(cache, &got_lock));                  \
953  if (!got_lock)                                                \
954    {                                                           \
955      svn_boolean_t exists;                                     \
956      SVN_ERR(entry_exists(cache, group_index, key, &exists));  \
957      if (exists)                                               \
958        SVN_ERR(force_write_lock_cache(cache));                 \
959      else                                                      \
960        break;                                                  \
961    }                                                           \
962  SVN_ERR(unlock_cache(cache, (expr)));                         \
963} while (0)
964
965/* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
966 * been initialized, yet. In that case, this group can not data. Otherwise,
967 * a non-zero value is returned.
968 */
969static APR_INLINE unsigned char
970is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
971{
972  unsigned char flags
973    = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
974  unsigned char bit_mask
975    = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
976
977  return flags & bit_mask;
978}
979
980/* Initializes the section of the directory in CACHE that contains
981 * the entry group identified by GROUP_INDEX. */
982static void
983initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
984{
985  unsigned char bit_mask;
986  apr_uint32_t i;
987
988  /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
989  apr_uint32_t first_index =
990      (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
991  apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
992  if (last_index > cache->group_count + cache->spare_group_count)
993    last_index = cache->group_count + cache->spare_group_count;
994
995  for (i = first_index; i < last_index; ++i)
996    {
997      group_header_t *header = &cache->directory[i].header;
998      header->used = 0;
999      header->chain_length = 1;
1000      header->next = NO_INDEX;
1001      header->previous = NO_INDEX;
1002    }
1003
1004  /* set the "initialized" bit for these groups */
1005  bit_mask
1006    = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
1007  cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
1008    |= bit_mask;
1009}
1010
1011/* Return the next available spare group from CACHE and mark it as used.
1012 * May return NULL.
1013 */
1014static entry_group_t *
1015allocate_spare_group(svn_membuffer_t *cache)
1016{
1017  entry_group_t *group = NULL;
1018
1019  /* is there some ready-to-use group? */
1020  if (cache->first_spare_group != NO_INDEX)
1021    {
1022      group = &cache->directory[cache->first_spare_group];
1023      cache->first_spare_group = group->header.next;
1024    }
1025
1026  /* any so far untouched spares available? */
1027  else if (cache->max_spare_used < cache->spare_group_count)
1028    {
1029      apr_uint32_t group_index = cache->group_count + cache->max_spare_used;
1030      ++cache->max_spare_used;
1031
1032      if (!is_group_initialized(cache, group_index))
1033        initialize_group(cache, group_index);
1034
1035      group = &cache->directory[group_index];
1036    }
1037
1038  /* spare groups must be empty */
1039  assert(!group || !group->header.used);
1040  return group;
1041}
1042
1043/* Mark previously allocated spare group GROUP in CACHE as "unused".
1044 */
1045static void
1046free_spare_group(svn_membuffer_t *cache,
1047                 entry_group_t *group)
1048{
1049  assert(group->header.used == 0);
1050  assert(group->header.previous != NO_INDEX);
1051  assert(group - cache->directory >= (apr_ssize_t)cache->group_count);
1052
1053  /* unchain */
1054  cache->directory[group->header.previous].header.next = NO_INDEX;
1055  group->header.chain_length = 0;
1056  group->header.previous = NO_INDEX;
1057
1058  /* add to chain of spares */
1059  group->header.next = cache->first_spare_group;
1060  cache->first_spare_group = (apr_uint32_t) (group - cache->directory);
1061}
1062
1063/* Follow the group chain from GROUP in CACHE to its end and return the last
1064 * group.  May return GROUP.
1065 */
1066static entry_group_t *
1067last_group_in_chain(svn_membuffer_t *cache,
1068                    entry_group_t *group)
1069{
1070  while (group->header.next != NO_INDEX)
1071    group = &cache->directory[group->header.next];
1072
1073  return group;
1074}
1075
1076/* Return the CHAIN_INDEX-th element in the group chain starting from group
1077 * START_GROUP_INDEX in CACHE.
1078 */
1079static entry_group_t *
1080get_group(svn_membuffer_t *cache,
1081          apr_uint32_t start_group_index,
1082          apr_uint32_t chain_index)
1083{
1084  entry_group_t *group = &cache->directory[start_group_index];
1085  for (; chain_index; --chain_index)
1086    group = &cache->directory[group->header.next];
1087
1088  return group;
1089}
1090
1091/* Resolve a dictionary entry reference, i.e. return the entry
1092 * for the given IDX.
1093 */
1094static APR_INLINE entry_t *
1095get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
1096{
1097  return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
1098}
1099
1100/* Get the entry references for the given ENTRY.
1101 */
1102static APR_INLINE apr_uint32_t
1103get_index(svn_membuffer_t *cache, entry_t *entry)
1104{
1105  apr_size_t group_index
1106    = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
1107
1108  return (apr_uint32_t)group_index * GROUP_SIZE
1109       + (apr_uint32_t)(entry - cache->directory[group_index].entries);
1110}
1111
1112/* Return the cache level of ENTRY in CACHE.
1113 */
1114static cache_level_t *
1115get_cache_level(svn_membuffer_t *cache, entry_t *entry)
1116{
1117  return entry->offset < cache->l1.size ? &cache->l1
1118                                        : &cache->l2;
1119}
1120
1121/* Insert ENTRY to the chain of items that belong to LEVEL in CACHE.  IDX
1122 * is ENTRY's item index and is only given for efficiency.  The insertion
1123 * takes place just before LEVEL->NEXT.  *CACHE will not be modified.
1124 */
1125static void
1126chain_entry(svn_membuffer_t *cache,
1127            cache_level_t *level,
1128            entry_t *entry,
1129            apr_uint32_t idx)
1130{
1131  /* insert ENTRY before this item */
1132  entry_t *next = level->next == NO_INDEX
1133                ? NULL
1134                : get_entry(cache, level->next);
1135  assert(idx == get_index(cache, entry));
1136
1137  /* update entry chain
1138   */
1139  entry->next = level->next;
1140  if (level->first == NO_INDEX)
1141    {
1142      /* insert as the first entry and only in the chain
1143       */
1144      entry->previous = NO_INDEX;
1145      level->last = idx;
1146      level->first = idx;
1147    }
1148  else if (next == NULL)
1149    {
1150      /* insert as the last entry in the chain.
1151       * Note that it cannot also be at the beginning of the chain.
1152       */
1153      entry->previous = level->last;
1154      get_entry(cache, level->last)->next = idx;
1155      level->last = idx;
1156    }
1157  else
1158    {
1159      /* insert either at the start of a non-empty list or
1160       * somewhere in the middle
1161       */
1162      entry->previous = next->previous;
1163      next->previous = idx;
1164
1165      if (entry->previous != NO_INDEX)
1166        get_entry(cache, entry->previous)->next = idx;
1167      else
1168        level->first = idx;
1169    }
1170}
1171
1172/* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
1173 * is ENTRY's item index and is only given for efficiency.  Please note
1174 * that neither *CACHE nor *ENTRY will not be modified.
1175 */
1176static void
1177unchain_entry(svn_membuffer_t *cache,
1178              cache_level_t *level,
1179              entry_t *entry,
1180              apr_uint32_t idx)
1181{
1182  assert(idx == get_index(cache, entry));
1183
1184  /* update
1185   */
1186  if (level->next == idx)
1187    level->next = entry->next;
1188
1189  /* unlink it from the chain of used entries
1190   */
1191  if (entry->previous == NO_INDEX)
1192    level->first = entry->next;
1193  else
1194    get_entry(cache, entry->previous)->next = entry->next;
1195
1196  if (entry->next == NO_INDEX)
1197    level->last = entry->previous;
1198  else
1199    get_entry(cache, entry->next)->previous = entry->previous;
1200}
1201
1202/* Remove the used ENTRY from the CACHE, i.e. make it "unused".
1203 * In contrast to insertion, removal is possible for any entry.
1204 */
1205static void
1206drop_entry(svn_membuffer_t *cache, entry_t *entry)
1207{
1208  /* the group that ENTRY belongs to plus a number of useful index values
1209   */
1210  apr_uint32_t idx = get_index(cache, entry);
1211  apr_uint32_t group_index = idx / GROUP_SIZE;
1212  entry_group_t *last_group
1213    = last_group_in_chain(cache, &cache->directory[group_index]);
1214  apr_uint32_t last_in_group
1215    = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE
1216    + last_group->header.used - 1);
1217
1218  cache_level_t *level = get_cache_level(cache, entry);
1219
1220  /* update global cache usage counters
1221   */
1222  cache->used_entries--;
1223  cache->data_used -= entry->size;
1224
1225  /* extend the insertion window, if the entry happens to border it
1226   */
1227  if (idx == level->next)
1228    level->next = entry->next;
1229  else
1230    if (entry->next == level->next)
1231      {
1232        /* insertion window starts right behind the entry to remove
1233         */
1234        if (entry->previous == NO_INDEX)
1235          {
1236            /* remove the first entry -> insertion may start at pos 0, now */
1237            level->current_data = level->start_offset;
1238          }
1239        else
1240          {
1241            /* insertion may start right behind the previous entry */
1242            entry_t *previous = get_entry(cache, entry->previous);
1243            level->current_data = ALIGN_VALUE(  previous->offset
1244                                              + previous->size);
1245          }
1246      }
1247
1248  /* unlink it from the chain of used entries
1249   */
1250  unchain_entry(cache, level, entry, idx);
1251
1252  /* Move last entry into hole (if the removed one is not the last used).
1253   * We need to do this since all used entries are at the beginning of
1254   * the group's entries array.
1255   */
1256  if (idx != last_in_group)
1257    {
1258      /* copy the last used entry to the removed entry's index
1259       */
1260      *entry = last_group->entries[last_group->header.used-1];
1261
1262      /* this ENTRY may belong to a different cache level than the entry
1263       * we have just removed */
1264      level = get_cache_level(cache, entry);
1265
1266      /* update foreign links to new index
1267       */
1268      if (last_in_group == level->next)
1269        level->next = idx;
1270
1271      if (entry->previous == NO_INDEX)
1272        level->first = idx;
1273      else
1274        get_entry(cache, entry->previous)->next = idx;
1275
1276      if (entry->next == NO_INDEX)
1277        level->last = idx;
1278      else
1279        get_entry(cache, entry->next)->previous = idx;
1280    }
1281
1282  /* Update the number of used entries.
1283   */
1284  last_group->header.used--;
1285
1286  /* Release the last group in the chain if it is a spare group
1287   */
1288  if (!last_group->header.used && last_group->header.previous != NO_INDEX)
1289    free_spare_group(cache, last_group);
1290}
1291
1292/* Insert ENTRY into the chain of used dictionary entries. The entry's
1293 * offset and size members must already have been initialized. Also,
1294 * the offset must match the beginning of the insertion window.
1295 */
1296static void
1297insert_entry(svn_membuffer_t *cache, entry_t *entry)
1298{
1299  /* the group that ENTRY belongs to plus a number of useful index values
1300   */
1301  apr_uint32_t idx = get_index(cache, entry);
1302  apr_uint32_t group_index = idx / GROUP_SIZE;
1303  entry_group_t *group = &cache->directory[group_index];
1304  cache_level_t *level = get_cache_level(cache, entry);
1305
1306  /* The entry must start at the beginning of the insertion window.
1307   * It must also be the first unused entry in the group.
1308   */
1309  assert(entry->offset == level->current_data);
1310  assert(idx == group_index * GROUP_SIZE + group->header.used);
1311  level->current_data = ALIGN_VALUE(entry->offset + entry->size);
1312
1313  /* update usage counters
1314   */
1315  cache->used_entries++;
1316  cache->data_used += entry->size;
1317  entry->hit_count = 0;
1318  group->header.used++;
1319
1320  /* update entry chain
1321   */
1322  chain_entry(cache, level, entry, idx);
1323
1324  /* The current insertion position must never point outside our
1325   * data buffer.
1326   */
1327  assert(level->current_data <= level->start_offset + level->size);
1328}
1329
1330/* Map a KEY of 16 bytes to the CACHE and group that shall contain the
1331 * respective item.
1332 */
1333static apr_uint32_t
1334get_group_index(svn_membuffer_t **cache,
1335                const entry_key_t *key)
1336{
1337  svn_membuffer_t *segment0 = *cache;
1338  apr_uint64_t key0 = key->fingerprint[0];
1339  apr_uint64_t key1 = key->fingerprint[1];
1340
1341  /* select the cache segment to use. they have all the same group_count.
1342   * Since key may not be well-distributed, pre-fold it to a smaller but
1343   * "denser" ranger.  The modulus is a prime larger than the largest
1344   * counts. */
1345  *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37))
1346                     & (segment0->segment_count - 1)];
1347  return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count;
1348}
1349
1350/* Reduce the hit count of ENTRY and update the accumulated hit info
1351 * in CACHE accordingly.
1352 */
1353static APR_INLINE void
1354let_entry_age(svn_membuffer_t *cache, entry_t *entry)
1355{
1356  apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
1357
1358  if (hits_removed)
1359    {
1360      entry->hit_count -= hits_removed;
1361    }
1362  else
1363    {
1364      entry->priority /= 2;
1365    }
1366}
1367
1368/* Return whether the keys in LHS and RHS match.
1369 */
1370static svn_boolean_t
1371entry_keys_match(const entry_key_t *lhs,
1372                 const entry_key_t *rhs)
1373{
1374  return (lhs->fingerprint[0] == rhs->fingerprint[0])
1375      && (lhs->fingerprint[1] == rhs->fingerprint[1])
1376      && (lhs->prefix_idx == rhs->prefix_idx)
1377      && (lhs->key_len == rhs->key_len);
1378}
1379
1380/* Given the GROUP_INDEX that shall contain an entry with the hash key
1381 * TO_FIND, find that entry in the specified group.
1382 *
1383 * If FIND_EMPTY is not set, this function will return the one used entry
1384 * that actually matches the hash or NULL, if no such entry exists.
1385 *
1386 * If FIND_EMPTY has been set, this function will drop the one used entry
1387 * that actually matches the hash (i.e. make it fit to be replaced with
1388 * new content), an unused entry or a forcibly removed entry (if all
1389 * group entries are currently in use). The entries' hash value will be
1390 * initialized with TO_FIND.
1391 *
1392 * Note: This function requires the caller to appropriately lock the CACHE.
1393 * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE,
1394 * the write lock must have been acquired.
1395 */
1396static entry_t *
1397find_entry(svn_membuffer_t *cache,
1398           apr_uint32_t group_index,
1399           const full_key_t *to_find,
1400           svn_boolean_t find_empty)
1401{
1402  entry_group_t *group;
1403  entry_t *entry = NULL;
1404  apr_size_t i;
1405
1406  /* get the group that *must* contain the entry
1407   */
1408  group = &cache->directory[group_index];
1409
1410  /* If the entry group has not been initialized, yet, there is no data.
1411   */
1412  if (! is_group_initialized(cache, group_index))
1413    {
1414      if (find_empty)
1415        {
1416          initialize_group(cache, group_index);
1417          entry = &group->entries[0];
1418
1419          /* initialize entry for the new key */
1420          entry->key = to_find->entry_key;
1421        }
1422
1423      return entry;
1424    }
1425
1426  /* try to find the matching entry
1427   */
1428  while (1)
1429    {
1430      for (i = 0; i < group->header.used; ++i)
1431        if (entry_keys_match(&group->entries[i].key, &to_find->entry_key))
1432          {
1433            /* This is the only entry that _may_ contain the correct data. */
1434            entry = &group->entries[i];
1435
1436            /* If we want to preserve it, check that it is actual a match. */
1437            if (!find_empty)
1438              {
1439                /* If the full key is fully defined in prefix_id & mangeled
1440                 * key, we are done. */
1441                if (!entry->key.key_len)
1442                  return entry;
1443
1444                /* Compare the full key. */
1445                if (memcmp(to_find->full_key.data,
1446                           cache->data + entry->offset,
1447                           entry->key.key_len) == 0)
1448                  return entry;
1449
1450                /* Key conflict. The entry to find cannot be anywhere else.
1451                 * Therefore, it is not cached. */
1452                return NULL;
1453              }
1454
1455            /* need to empty that entry */
1456            drop_entry(cache, entry);
1457            if (group->header.used == GROUP_SIZE)
1458              group = last_group_in_chain(cache, group);
1459            else if (group->header.chain_length == 0)
1460              group = last_group_in_chain(cache,
1461                                          &cache->directory[group_index]);
1462
1463            /* No entry found (actually, none left to find). */
1464            entry = NULL;
1465            break;
1466          }
1467
1468      /* end of chain? */
1469      if (group->header.next == NO_INDEX)
1470        break;
1471
1472      /* only full groups may chain */
1473      assert(group->header.used == GROUP_SIZE);
1474      group = &cache->directory[group->header.next];
1475    }
1476
1477  /* None found. Are we looking for a free entry?
1478   */
1479  if (find_empty)
1480    {
1481      /* There is no empty entry in the chain, try chaining a spare group.
1482       */
1483      if (   group->header.used == GROUP_SIZE
1484          && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH)
1485        {
1486          entry_group_t *new_group = allocate_spare_group(cache);
1487          if (new_group)
1488            {
1489              /* chain groups
1490               */
1491              new_group->header.chain_length = group->header.chain_length + 1;
1492              new_group->header.previous = (apr_uint32_t) (group -
1493                                                           cache->directory);
1494              new_group->header.next = NO_INDEX;
1495              group->header.next = (apr_uint32_t) (new_group -
1496                                                   cache->directory);
1497              group = new_group;
1498            }
1499        }
1500
1501      /* if GROUP is still filled, we need to remove a random entry */
1502      if (group->header.used == GROUP_SIZE)
1503        {
1504          /* every entry gets the same chance of being removed.
1505           * Otherwise, we free the first entry, fill it and
1506           * remove it again on the next occasion without considering
1507           * the other entries in this group.
1508           *
1509           * We hit only one random group instead of processing all
1510           * groups in the chain.
1511           */
1512          cache_level_t *entry_level;
1513          int to_remove = rand() % (GROUP_SIZE * group->header.chain_length);
1514          entry_group_t *to_shrink
1515            = get_group(cache, group_index, to_remove / GROUP_SIZE);
1516
1517          entry = &to_shrink->entries[to_remove % GROUP_SIZE];
1518          entry_level = get_cache_level(cache, entry);
1519          for (i = 0; i < GROUP_SIZE; ++i)
1520            {
1521              /* keep L1 entries whenever possible */
1522
1523              cache_level_t *level
1524                = get_cache_level(cache, &to_shrink->entries[i]);
1525              if (   (level != entry_level && entry_level == &cache->l1)
1526                  || (entry->hit_count > to_shrink->entries[i].hit_count))
1527                {
1528                  entry_level = level;
1529                  entry = &to_shrink->entries[i];
1530                }
1531            }
1532
1533          /* for the entries that don't have been removed,
1534           * reduce their hit counts to put them at a relative
1535           * disadvantage the next time.
1536           */
1537          for (i = 0; i < GROUP_SIZE; ++i)
1538            if (entry != &to_shrink->entries[i])
1539              let_entry_age(cache, &to_shrink->entries[i]);
1540
1541          drop_entry(cache, entry);
1542        }
1543
1544      /* initialize entry for the new key
1545       */
1546      entry = &group->entries[group->header.used];
1547      entry->key = to_find->entry_key;
1548    }
1549
1550  return entry;
1551}
1552
1553/* Move a surviving ENTRY from just behind the insertion window to
1554 * its beginning and move the insertion window up accordingly.
1555 */
1556static void
1557move_entry(svn_membuffer_t *cache, entry_t *entry)
1558{
1559  apr_size_t size = ALIGN_VALUE(entry->size);
1560  cache_level_t *level = get_cache_level(cache, entry);
1561
1562  /* This entry survived this cleansing run. Reset half of its
1563   * hit count so that its removal gets more likely in the next
1564   * run unless someone read / hit this entry in the meantime.
1565   */
1566  let_entry_age(cache, entry);
1567
1568  /* Move the entry to the start of the empty / insertion section
1569   * (if it isn't there already). Size-aligned moves are legal
1570   * since all offsets and block sizes share this same alignment.
1571   * Size-aligned moves tend to be faster than non-aligned ones
1572   * because no "odd" bytes at the end need to special treatment.
1573   */
1574  if (entry->offset != level->current_data)
1575    {
1576      memmove(cache->data + level->current_data,
1577              cache->data + entry->offset,
1578              size);
1579      entry->offset = level->current_data;
1580    }
1581
1582  /* The insertion position is now directly behind this entry.
1583   */
1584  level->current_data = entry->offset + size;
1585  level->next = entry->next;
1586
1587  /* The current insertion position must never point outside our
1588   * data buffer.
1589   */
1590  assert(level->current_data <= level->start_offset + level->size);
1591}
1592
1593/* Move ENTRY in CACHE from L1 to L2.
1594 */
1595static void
1596promote_entry(svn_membuffer_t *cache, entry_t *entry)
1597{
1598  apr_uint32_t idx = get_index(cache, entry);
1599  apr_size_t size = ALIGN_VALUE(entry->size);
1600  assert(get_cache_level(cache, entry) == &cache->l1);
1601  assert(idx == cache->l1.next);
1602
1603  /* copy item from the current location in L1 to the start of L2's
1604   * insertion window */
1605  memmove(cache->data + cache->l2.current_data,
1606          cache->data + entry->offset,
1607          size);
1608  entry->offset = cache->l2.current_data;
1609
1610  /* The insertion position is now directly behind this entry.
1611   */
1612  cache->l2.current_data += size;
1613
1614  /* remove ENTRY from chain of L1 entries and put it into L2
1615   */
1616  unchain_entry(cache, &cache->l1, entry, idx);
1617  chain_entry(cache, &cache->l2, entry, idx);
1618}
1619
1620/* This function implements the cache insertion / eviction strategy for L2.
1621 *
1622 * If necessary, enlarge the insertion window of CACHE->L2 until it is at
1623 * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
1624 * data buffer size allocated to CACHE->L2.  IDX is the item index of
1625 * TO_FIT_IN and is given for performance reasons.
1626 *
1627 * Return TRUE if enough room could be found or made.  A FALSE result
1628 * indicates that the respective item shall not be added.
1629 */
1630static svn_boolean_t
1631ensure_data_insertable_l2(svn_membuffer_t *cache,
1632                          entry_t *to_fit_in)
1633{
1634  entry_t *entry;
1635
1636  /* accumulated size of the entries that have been removed to make
1637   * room for the new one.
1638   */
1639  apr_uint64_t moved_size = 0;
1640
1641  /* count the number of entries that got moved.  A single large entry
1642   * being moved is not enough to reject an insertion.
1643   */
1644  apr_size_t moved_count = 0;
1645
1646  /* accumulated "worth" of items dropped so far */
1647  apr_uint64_t drop_hits = 0;
1648
1649  /* estimated "worth" of the new entry */
1650  apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1)
1651                               * (apr_uint64_t)to_fit_in->priority;
1652
1653  /* This loop will eventually terminate because every cache entry
1654   * would get dropped eventually:
1655   *
1656   * - the incoming entry is small enough to fit into L2
1657   * - every iteration either frees parts of L2 or counts the moved size
1658   * - eventually, we either moved too many items with too much total size
1659   *   to accept the new entry, or made enough room in L2 for the new entry
1660   *
1661   * Low-prio items get rejected even sooner.
1662   */
1663  while (1)
1664    {
1665      /* first offset behind the insertion window
1666       */
1667      apr_uint64_t end = cache->l2.next == NO_INDEX
1668                       ? cache->l2.start_offset + cache->l2.size
1669                       : get_entry(cache, cache->l2.next)->offset;
1670
1671      /* leave function as soon as the insertion window is large enough
1672       */
1673      if (end - cache->l2.current_data >= to_fit_in->size)
1674        return TRUE;
1675
1676      /* Don't be too eager to cache data.  If a lot of data has been moved
1677       * around, the current item has probably a relatively low priority.
1678       * We must also limit the effort spent here (even in case of faulty
1679       * heuristics).  Therefore, give up after some time.
1680       */
1681      if (moved_size / 4 > to_fit_in->size && moved_count > 7)
1682        return FALSE;
1683
1684      /* if the net worth (in weighted hits) of items removed is already
1685       * larger than what we want to insert, reject TO_FIT_IN because it
1686       * still does not fit in. */
1687      if (drop_hits > drop_hits_limit)
1688        return FALSE;
1689
1690      /* try to enlarge the insertion window
1691       */
1692      if (cache->l2.next == NO_INDEX)
1693        {
1694          /* We reached the end of the data buffer; restart at the beginning.
1695           * Due to the randomized nature of our LFU implementation, very
1696           * large data items may require multiple passes. Therefore, SIZE
1697           * should be restricted to significantly less than data_size.
1698           */
1699          cache->l2.current_data = cache->l2.start_offset;
1700          cache->l2.next = cache->l2.first;
1701        }
1702      else
1703        {
1704          svn_boolean_t keep;
1705          entry = get_entry(cache, cache->l2.next);
1706
1707          if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1708            {
1709              /* Low prio items can only be accepted only if the current
1710               * entry is of even lower prio and has fewer hits.
1711               */
1712              if (   entry->priority > to_fit_in->priority
1713                  || entry->hit_count > to_fit_in->hit_count)
1714                return FALSE;
1715            }
1716
1717          if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1718            {
1719              /* Be quick to remove low-prio entries - even if the incoming
1720               * one is low-prio as well.  This makes room for more important
1721               * data and replaces existing data with newly read information.
1722               */
1723              keep = FALSE;
1724            }
1725          else
1726            {
1727              /* If the existing data is the same prio as the incoming data,
1728               * drop the existing entry if it had seen fewer (probably 0)
1729               * hits than the entry coming in from L1.  In case of different
1730               * priorities, keep the current entry of it has higher prio.
1731               * The new entry may still find room by ousting other entries.
1732               */
1733              keep = to_fit_in->priority == entry->priority
1734                   ? entry->hit_count >= to_fit_in->hit_count
1735                   : entry->priority > to_fit_in->priority;
1736            }
1737
1738          /* keepers or destroyers? */
1739          if (keep)
1740            {
1741              /* Moving entries around is not for free -> track costs. */
1742              moved_size += entry->size;
1743              moved_count++;
1744
1745              move_entry(cache, entry);
1746            }
1747          else
1748            {
1749              /* Drop the entry from the end of the insertion window.
1750               * Count the "hit importance" such that we are not sacrificing
1751               * too much of the high-hit contents.  However, don't count
1752               * low-priority hits because higher prio entries will often
1753               * provide the same data but in a further stage of processing.
1754               */
1755              if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1756                drop_hits += entry->hit_count * (apr_uint64_t)entry->priority;
1757
1758              drop_entry(cache, entry);
1759            }
1760        }
1761    }
1762
1763  /* This will never be reached. But if it was, "can't insert" was the
1764   * right answer. */
1765}
1766
1767/* This function implements the cache insertion / eviction strategy for L1.
1768 *
1769 * If necessary, enlarge the insertion window of CACHE->L1 by promoting
1770 * entries to L2 until it is at least SIZE bytes long.
1771 *
1772 * Return TRUE if enough room could be found or made.  A FALSE result
1773 * indicates that the respective item shall not be added because it is
1774 * too large.
1775 */
1776static svn_boolean_t
1777ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
1778{
1779  /* Guarantees that the while loop will terminate. */
1780  if (size > cache->l1.size)
1781    return FALSE;
1782
1783  /* This loop will eventually terminate because every cache entry
1784   * would get dropped eventually.
1785   */
1786  while (1)
1787    {
1788      /* first offset behind the insertion window
1789       */
1790      apr_uint32_t entry_index = cache->l1.next;
1791      entry_t *entry = get_entry(cache, entry_index);
1792      apr_uint64_t end = cache->l1.next == NO_INDEX
1793                       ? cache->l1.start_offset + cache->l1.size
1794                       : entry->offset;
1795
1796      /* leave function as soon as the insertion window is large enough
1797       */
1798      if (end - cache->l1.current_data >= size)
1799        return TRUE;
1800
1801      /* Enlarge the insertion window
1802       */
1803      if (cache->l1.next == NO_INDEX)
1804        {
1805          /* We reached the end of the data buffer; restart at the beginning.
1806           * Due to the randomized nature of our LFU implementation, very
1807           * large data items may require multiple passes. Therefore, SIZE
1808           * should be restricted to significantly less than data_size.
1809           */
1810          cache->l1.current_data = cache->l1.start_offset;
1811          cache->l1.next = cache->l1.first;
1812        }
1813      else
1814        {
1815          /* Remove the entry from the end of insertion window and promote
1816           * it to L2, if it is important enough.
1817           */
1818          svn_boolean_t keep = ensure_data_insertable_l2(cache, entry);
1819
1820          /* We might have touched the group that contains ENTRY. Recheck. */
1821          if (entry_index == cache->l1.next)
1822            {
1823              if (keep)
1824                promote_entry(cache, entry);
1825              else
1826                drop_entry(cache, entry);
1827            }
1828        }
1829    }
1830
1831  /* This will never be reached. But if it was, "can't insert" was the
1832   * right answer. */
1833}
1834
1835svn_error_t *
1836svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1837                                  apr_size_t total_size,
1838                                  apr_size_t directory_size,
1839                                  apr_size_t segment_count,
1840                                  svn_boolean_t thread_safe,
1841                                  svn_boolean_t allow_blocking_writes,
1842                                  apr_pool_t *pool)
1843{
1844  svn_membuffer_t *c;
1845  prefix_pool_t *prefix_pool;
1846
1847  apr_uint32_t seg;
1848  apr_uint32_t group_count;
1849  apr_uint32_t main_group_count;
1850  apr_uint32_t spare_group_count;
1851  apr_uint32_t group_init_size;
1852  apr_uint64_t data_size;
1853  apr_uint64_t max_entry_size;
1854
1855  /* Allocate 1% of the cache capacity to the prefix string pool.
1856   */
1857  SVN_ERR(prefix_pool_create(&prefix_pool, total_size / 100, thread_safe,
1858                             pool));
1859  total_size -= total_size / 100;
1860
1861  /* Limit the total size (only relevant if we can address > 4GB)
1862   */
1863#if APR_SIZEOF_VOIDP > 4
1864  if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1865    total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1866#endif
1867
1868  /* Limit the segment count
1869   */
1870  if (segment_count > MAX_SEGMENT_COUNT)
1871    segment_count = MAX_SEGMENT_COUNT;
1872  if (segment_count * MIN_SEGMENT_SIZE > total_size)
1873    segment_count = total_size / MIN_SEGMENT_SIZE;
1874
1875  /* The segment count must be a power of two. Round it down as necessary.
1876   */
1877  while ((segment_count & (segment_count-1)) != 0)
1878    segment_count &= segment_count-1;
1879
1880  /* if the caller hasn't provided a reasonable segment count or the above
1881   * limitations set it to 0, derive one from the absolute cache size
1882   */
1883  if (segment_count < 1)
1884    {
1885      /* Determine a reasonable number of cache segments. Segmentation is
1886       * only useful for multi-threaded / multi-core servers as it reduces
1887       * lock contention on these systems.
1888       *
1889       * But on these systems, we can assume that ample memory has been
1890       * allocated to this cache. Smaller caches should not be segmented
1891       * as this severely limits the maximum size of cachable items.
1892       *
1893       * Segments should not be smaller than 32MB and max. cachable item
1894       * size should grow as fast as segmentation.
1895       */
1896
1897      apr_uint32_t segment_count_shift = 0;
1898      while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1899             < total_size)
1900        ++segment_count_shift;
1901
1902      segment_count = (apr_size_t)1 << segment_count_shift;
1903    }
1904
1905  /* If we have an extremely large cache (>512 GB), the default segment
1906   * size may exceed the amount allocatable as one chunk. In that case,
1907   * increase segmentation until we are under the threshold.
1908   */
1909  while (   total_size / segment_count > MAX_SEGMENT_SIZE
1910         && segment_count < MAX_SEGMENT_COUNT)
1911    segment_count *= 2;
1912
1913  /* allocate cache as an array of segments / cache objects */
1914  c = apr_palloc(pool, segment_count * sizeof(*c));
1915
1916  /* Split total cache size into segments of equal size
1917   */
1918  total_size /= segment_count;
1919  directory_size /= segment_count;
1920
1921  /* prevent pathological conditions: ensure a certain minimum cache size
1922   */
1923  if (total_size < 2 * sizeof(entry_group_t))
1924    total_size = 2 * sizeof(entry_group_t);
1925
1926  /* adapt the dictionary size accordingly, if necessary:
1927   * It must hold at least one group and must not exceed the cache size.
1928   */
1929  if (directory_size > total_size - sizeof(entry_group_t))
1930    directory_size = total_size - sizeof(entry_group_t);
1931  if (directory_size < 2 * sizeof(entry_group_t))
1932    directory_size = 2 * sizeof(entry_group_t);
1933
1934  /* limit the data size to what we can address.
1935   * Note that this cannot overflow since all values are of size_t.
1936   * Also, make it a multiple of the item placement granularity to
1937   * prevent subtle overflows.
1938   */
1939  data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1940
1941  /* For cache sizes > 16TB, individual cache segments will be larger
1942   * than 32GB allowing for >4GB entries.  But caching chunks larger
1943   * than 4GB are simply not supported.
1944   */
1945  max_entry_size = data_size / 8 > MAX_ITEM_SIZE
1946                 ? MAX_ITEM_SIZE
1947                 : data_size / 8;
1948
1949  /* to keep the entries small, we use 32 bit indexes only
1950   * -> we need to ensure that no more than 4G entries exist.
1951   *
1952   * Note, that this limit could only be exceeded in a very
1953   * theoretical setup with about 1EB of cache.
1954   */
1955  group_count = directory_size / sizeof(entry_group_t)
1956                    >= (APR_UINT32_MAX / GROUP_SIZE)
1957              ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1958              : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1959
1960  /* set some of the index directory aside as over-flow (spare) buffers */
1961  spare_group_count = MAX(group_count / 4, 1);
1962  main_group_count = group_count - spare_group_count;
1963  assert(spare_group_count > 0 && main_group_count > 0);
1964
1965  group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1966  for (seg = 0; seg < segment_count; ++seg)
1967    {
1968      /* allocate buffers and initialize cache members
1969       */
1970      c[seg].segment_count = (apr_uint32_t)segment_count;
1971      c[seg].prefix_pool = prefix_pool;
1972
1973      c[seg].group_count = main_group_count;
1974      c[seg].spare_group_count = spare_group_count;
1975      c[seg].first_spare_group = NO_INDEX;
1976      c[seg].max_spare_used = 0;
1977
1978      /* Allocate but don't clear / zero the directory because it would add
1979         significantly to the server start-up time if the caches are large.
1980         Group initialization will take care of that in stead. */
1981      c[seg].directory = apr_palloc(pool,
1982                                    group_count * sizeof(entry_group_t));
1983
1984      /* Allocate and initialize directory entries as "not initialized",
1985         hence "unused" */
1986      c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1987
1988      /* Allocate 1/4th of the data buffer to L1
1989       */
1990      c[seg].l1.first = NO_INDEX;
1991      c[seg].l1.last = NO_INDEX;
1992      c[seg].l1.next = NO_INDEX;
1993      c[seg].l1.start_offset = 0;
1994      c[seg].l1.size = ALIGN_VALUE(data_size / 4);
1995      c[seg].l1.current_data = 0;
1996
1997      /* The remaining 3/4th will be used as L2
1998       */
1999      c[seg].l2.first = NO_INDEX;
2000      c[seg].l2.last = NO_INDEX;
2001      c[seg].l2.next = NO_INDEX;
2002      c[seg].l2.start_offset = c[seg].l1.size;
2003      c[seg].l2.size = ALIGN_VALUE(data_size) - c[seg].l1.size;
2004      c[seg].l2.current_data = c[seg].l2.start_offset;
2005
2006      /* This cast is safe because DATA_SIZE <= MAX_SEGMENT_SIZE. */
2007      c[seg].data = apr_palloc(pool, (apr_size_t)ALIGN_VALUE(data_size));
2008      c[seg].data_used = 0;
2009      c[seg].max_entry_size = max_entry_size;
2010
2011      c[seg].used_entries = 0;
2012      c[seg].total_reads = 0;
2013      c[seg].total_writes = 0;
2014      c[seg].total_hits = 0;
2015
2016      /* were allocations successful?
2017       * If not, initialize a minimal cache structure.
2018       */
2019      if (c[seg].data == NULL || c[seg].directory == NULL)
2020        {
2021          /* We are OOM. There is no need to proceed with "half a cache".
2022           */
2023          return svn_error_wrap_apr(APR_ENOMEM, "OOM");
2024        }
2025
2026#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
2027      /* A lock for intra-process synchronization to the cache, or NULL if
2028       * the cache's creator doesn't feel the cache needs to be
2029       * thread-safe.
2030       */
2031      SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
2032#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
2033      /* Same for read-write lock. */
2034      c[seg].lock = NULL;
2035      if (thread_safe)
2036        {
2037          apr_status_t status =
2038              apr_thread_rwlock_create(&(c[seg].lock), pool);
2039          if (status)
2040            return svn_error_wrap_apr(status, _("Can't create cache mutex"));
2041        }
2042
2043      /* Select the behavior of write operations.
2044       */
2045      c[seg].allow_blocking_writes = allow_blocking_writes;
2046#endif
2047      /* No writers at the moment. */
2048      c[seg].write_lock_count = 0;
2049    }
2050
2051  /* done here
2052   */
2053  *cache = c;
2054  return SVN_NO_ERROR;
2055}
2056
2057svn_error_t *
2058svn_cache__membuffer_clear(svn_membuffer_t *cache)
2059{
2060  apr_size_t seg;
2061  apr_size_t segment_count = cache->segment_count;
2062
2063  /* Length of the group_initialized array in bytes.
2064     See also svn_cache__membuffer_cache_create(). */
2065  apr_size_t group_init_size
2066    = 1 + (cache->group_count + cache->spare_group_count)
2067            / (8 * GROUP_INIT_GRANULARITY);
2068
2069  /* Clear segment by segment.  This implies that other thread may read
2070     and write to other segments after we cleared them and before the
2071     last segment is done.
2072
2073     However, that is no different from a write request coming through
2074     right after we cleared all segments because dependencies between
2075     cache entries (recursive lookup / access locks) are not allowed.
2076   */
2077  for (seg = 0; seg < segment_count; ++seg)
2078    {
2079      /* Unconditionally acquire the write lock. */
2080      SVN_ERR(force_write_lock_cache(&cache[seg]));
2081
2082      /* Mark all groups as "not initialized", which implies "empty". */
2083      cache[seg].first_spare_group = NO_INDEX;
2084      cache[seg].max_spare_used = 0;
2085
2086      memset(cache[seg].group_initialized, 0, group_init_size);
2087
2088      /* Unlink L1 contents. */
2089      cache[seg].l1.first = NO_INDEX;
2090      cache[seg].l1.last = NO_INDEX;
2091      cache[seg].l1.next = NO_INDEX;
2092      cache[seg].l1.current_data = cache[seg].l1.start_offset;
2093
2094      /* Unlink L2 contents. */
2095      cache[seg].l2.first = NO_INDEX;
2096      cache[seg].l2.last = NO_INDEX;
2097      cache[seg].l2.next = NO_INDEX;
2098      cache[seg].l2.current_data = cache[seg].l2.start_offset;
2099
2100      /* Reset content counters. */
2101      cache[seg].data_used = 0;
2102      cache[seg].used_entries = 0;
2103
2104      /* Segment may be used again. */
2105      SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR));
2106    }
2107
2108  /* done here */
2109  return SVN_NO_ERROR;
2110}
2111
2112/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2113 * by the hash value TO_FIND and set *FOUND accordingly.
2114 *
2115 * Note: This function requires the caller to serialize access.
2116 * Don't call it directly, call entry_exists instead.
2117 */
2118static svn_error_t *
2119entry_exists_internal(svn_membuffer_t *cache,
2120                      apr_uint32_t group_index,
2121                      const full_key_t *to_find,
2122                      svn_boolean_t *found)
2123{
2124  *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
2125  return SVN_NO_ERROR;
2126}
2127
2128/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2129 * by the hash value TO_FIND and set *FOUND accordingly.
2130 */
2131static svn_error_t *
2132entry_exists(svn_membuffer_t *cache,
2133             apr_uint32_t group_index,
2134             const full_key_t *to_find,
2135             svn_boolean_t *found)
2136{
2137  WITH_READ_LOCK(cache,
2138                 entry_exists_internal(cache,
2139                                       group_index,
2140                                       to_find,
2141                                       found));
2142
2143  return SVN_NO_ERROR;
2144}
2145
2146/* Given the SIZE and PRIORITY of a new item, return the cache level
2147   (L1 or L2) in fragment CACHE that this item shall be inserted into.
2148   If we can't find nor make enough room for the item, return NULL.
2149 */
2150static cache_level_t *
2151select_level(svn_membuffer_t *cache,
2152             apr_size_t size,
2153             apr_uint32_t priority)
2154{
2155  if (cache->max_entry_size >= size)
2156    {
2157      /* Small items go into L1. */
2158      return ensure_data_insertable_l1(cache, size)
2159           ? &cache->l1
2160           : NULL;
2161    }
2162  else if (   cache->l2.size >= size
2163           && MAX_ITEM_SIZE >= size
2164           && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
2165    {
2166      /* Large but important items go into L2. */
2167      entry_t dummy_entry = { { { 0 } } };
2168      dummy_entry.priority = priority;
2169      dummy_entry.size = size;
2170
2171      return ensure_data_insertable_l2(cache, &dummy_entry)
2172           ? &cache->l2
2173           : NULL;
2174    }
2175
2176  /* Don't cache large, unimportant items. */
2177  return NULL;
2178}
2179
2180/* Try to insert the serialized item given in BUFFER with ITEM_SIZE
2181 * into the group GROUP_INDEX of CACHE and uniquely identify it by
2182 * hash value TO_FIND.
2183 *
2184 * However, there is no guarantee that it will actually be put into
2185 * the cache. If there is already some data associated with TO_FIND,
2186 * it will be removed from the cache even if the new data cannot
2187 * be inserted.
2188 *
2189 * Note: This function requires the caller to serialization access.
2190 * Don't call it directly, call membuffer_cache_set instead.
2191 */
2192static svn_error_t *
2193membuffer_cache_set_internal(svn_membuffer_t *cache,
2194                             const full_key_t *to_find,
2195                             apr_uint32_t group_index,
2196                             char *buffer,
2197                             apr_size_t item_size,
2198                             apr_uint32_t priority,
2199                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2200                             apr_pool_t *scratch_pool)
2201{
2202  cache_level_t *level;
2203  apr_size_t size;
2204
2205  /* first, look for a previous entry for the given key */
2206  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2207
2208  /* If this one fails, you are using multiple threads but created the
2209   * membuffer in single-threaded mode. */
2210  assert(0 == svn_atomic_inc(&cache->write_lock_count));
2211
2212  /* Quick check make sure arithmetics will work further down the road. */
2213  size = item_size + to_find->entry_key.key_len;
2214  if (size < item_size)
2215    {
2216      /* Arithmetic overflow, so combination of serialized ITEM and KEY
2217       * cannot not fit into the cache.  Setting BUFFER to NULL will cause
2218       * the removal of any entry if that exists without writing new data. */
2219      buffer = NULL;
2220    }
2221
2222  /* if there is an old version of that entry and the new data fits into
2223   * the old spot, just re-use that space. */
2224  if (entry && buffer && ALIGN_VALUE(entry->size) >= size)
2225    {
2226      /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
2227       * lest we run into trouble with 32 bit underflow *not* treated as a
2228       * negative value.
2229       */
2230      cache->data_used += (apr_uint64_t)size - entry->size;
2231      entry->size = size;
2232      entry->priority = priority;
2233
2234#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2235
2236      /* Remember original content, type and key (hashes)
2237       */
2238      SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2239      memcpy(&entry->tag, tag, sizeof(*tag));
2240
2241#endif
2242
2243      if (entry->key.key_len)
2244        memcpy(cache->data + entry->offset, to_find->full_key.data,
2245               entry->key.key_len);
2246      if (item_size)
2247        memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2248               item_size);
2249
2250      cache->total_writes++;
2251
2252      /* Putting the decrement into an assert() to make it disappear
2253       * in production code. */
2254      assert(0 == svn_atomic_dec(&cache->write_lock_count));
2255      return SVN_NO_ERROR;
2256    }
2257
2258  /* if necessary, enlarge the insertion window.
2259   */
2260  level = buffer ? select_level(cache, size, priority) : NULL;
2261  if (level)
2262    {
2263      /* Remove old data for this key, if that exists.
2264       * Get an unused entry for the key and and initialize it with
2265       * the serialized item's (future) position within data buffer.
2266       */
2267      entry = find_entry(cache, group_index, to_find, TRUE);
2268      entry->size = size;
2269      entry->offset = level->current_data;
2270      entry->priority = priority;
2271
2272#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2273
2274      /* Remember original content, type and key (hashes)
2275       */
2276      SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2277      memcpy(&entry->tag, tag, sizeof(*tag));
2278
2279#endif
2280
2281      /* Link the entry properly.
2282       */
2283      insert_entry(cache, entry);
2284
2285      /* Copy the serialized item data into the cache.
2286       */
2287      if (entry->key.key_len)
2288        memcpy(cache->data + entry->offset, to_find->full_key.data,
2289               entry->key.key_len);
2290      if (item_size)
2291        memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2292               item_size);
2293
2294      cache->total_writes++;
2295    }
2296  else
2297    {
2298      /* if there is already an entry for this key, drop it.
2299       * Since ensure_data_insertable may have removed entries from
2300       * ENTRY's group, re-do the lookup.
2301       */
2302      entry = find_entry(cache, group_index, to_find, FALSE);
2303      if (entry)
2304        drop_entry(cache, entry);
2305    }
2306
2307  /* Putting the decrement into an assert() to make it disappear
2308   * in production code. */
2309  assert(0 == svn_atomic_dec(&cache->write_lock_count));
2310  return SVN_NO_ERROR;
2311}
2312
2313/* Try to insert the ITEM and use the KEY to uniquely identify it.
2314 * However, there is no guarantee that it will actually be put into
2315 * the cache. If there is already some data associated to the KEY,
2316 * it will be removed from the cache even if the new data cannot
2317 * be inserted.
2318 *
2319 * The SERIALIZER is called to transform the ITEM into a single,
2320 * flat data buffer. Temporary allocations may be done in POOL.
2321 */
2322static svn_error_t *
2323membuffer_cache_set(svn_membuffer_t *cache,
2324                    const full_key_t *key,
2325                    void *item,
2326                    svn_cache__serialize_func_t serializer,
2327                    apr_uint32_t priority,
2328                    DEBUG_CACHE_MEMBUFFER_TAG_ARG
2329                    apr_pool_t *scratch_pool)
2330{
2331  apr_uint32_t group_index;
2332  void *buffer = NULL;
2333  apr_size_t size = 0;
2334
2335  /* find the entry group that will hold the key.
2336   */
2337  group_index = get_group_index(&cache, &key->entry_key);
2338
2339  /* Serialize data data.
2340   */
2341  if (item)
2342    SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
2343
2344  /* The actual cache data access needs to sync'ed
2345   */
2346  WITH_WRITE_LOCK(cache,
2347                  membuffer_cache_set_internal(cache,
2348                                               key,
2349                                               group_index,
2350                                               buffer,
2351                                               size,
2352                                               priority,
2353                                               DEBUG_CACHE_MEMBUFFER_TAG
2354                                               scratch_pool));
2355  return SVN_NO_ERROR;
2356}
2357
2358/* Count a hit in ENTRY within CACHE.
2359 */
2360static void
2361increment_hit_counters(svn_membuffer_t *cache, entry_t *entry)
2362{
2363  /* To minimize the memory footprint of the cache index, we limit local
2364   * hit counters to 32 bits.  These may overflow but we don't really
2365   * care because at worst, ENTRY will be dropped from cache once every
2366   * few billion hits. */
2367  svn_atomic_inc(&entry->hit_count);
2368
2369  /* That one is for stats only. */
2370  cache->total_hits++;
2371}
2372
2373/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2374 * by the hash value TO_FIND. If no item has been stored for KEY,
2375 * *BUFFER will be NULL. Otherwise, return a copy of the serialized
2376 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
2377 * be done in POOL.
2378 *
2379 * Note: This function requires the caller to serialization access.
2380 * Don't call it directly, call membuffer_cache_get instead.
2381 */
2382static svn_error_t *
2383membuffer_cache_get_internal(svn_membuffer_t *cache,
2384                             apr_uint32_t group_index,
2385                             const full_key_t *to_find,
2386                             char **buffer,
2387                             apr_size_t *item_size,
2388                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2389                             apr_pool_t *result_pool)
2390{
2391  entry_t *entry;
2392  apr_size_t size;
2393
2394  /* The actual cache data access needs to sync'ed
2395   */
2396  entry = find_entry(cache, group_index, to_find, FALSE);
2397  cache->total_reads++;
2398  if (entry == NULL)
2399    {
2400      /* no such entry found.
2401       */
2402      *buffer = NULL;
2403      *item_size = 0;
2404
2405      return SVN_NO_ERROR;
2406    }
2407
2408  size = ALIGN_VALUE(entry->size) - entry->key.key_len;
2409  *buffer = apr_palloc(result_pool, size);
2410  memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size);
2411
2412#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2413
2414  /* Check for overlapping entries.
2415   */
2416  SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2417                 entry->offset + size
2418                    <= get_entry(cache, entry->next)->offset);
2419
2420  /* Compare original content, type and key (hashes)
2421   */
2422  SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len,
2423                             result_pool));
2424  SVN_ERR(assert_equal_tags(&entry->tag, tag));
2425
2426#endif
2427
2428  /* update hit statistics
2429   */
2430  increment_hit_counters(cache, entry);
2431  *item_size = entry->size - entry->key.key_len;
2432
2433  return SVN_NO_ERROR;
2434}
2435
2436/* Look for the *ITEM identified by KEY. If no item has been stored
2437 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
2438 * to re-construct the proper object from the serialized data.
2439 * Allocations will be done in POOL.
2440 */
2441static svn_error_t *
2442membuffer_cache_get(svn_membuffer_t *cache,
2443                    const full_key_t *key,
2444                    void **item,
2445                    svn_cache__deserialize_func_t deserializer,
2446                    DEBUG_CACHE_MEMBUFFER_TAG_ARG
2447                    apr_pool_t *result_pool)
2448{
2449  apr_uint32_t group_index;
2450  char *buffer;
2451  apr_size_t size;
2452
2453  /* find the entry group that will hold the key.
2454   */
2455  group_index = get_group_index(&cache, &key->entry_key);
2456  WITH_READ_LOCK(cache,
2457                 membuffer_cache_get_internal(cache,
2458                                              group_index,
2459                                              key,
2460                                              &buffer,
2461                                              &size,
2462                                              DEBUG_CACHE_MEMBUFFER_TAG
2463                                              result_pool));
2464
2465  /* re-construct the original data object from its serialized form.
2466   */
2467  if (buffer == NULL)
2468    {
2469      *item = NULL;
2470      return SVN_NO_ERROR;
2471    }
2472
2473  return deserializer(item, buffer, size, result_pool);
2474}
2475
2476/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2477 * by the hash value TO_FIND.  If no item has been stored for KEY, *FOUND
2478 * will be FALSE and TRUE otherwise.
2479 */
2480static svn_error_t *
2481membuffer_cache_has_key_internal(svn_membuffer_t *cache,
2482                                 apr_uint32_t group_index,
2483                                 const full_key_t *to_find,
2484                                 svn_boolean_t *found)
2485{
2486  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2487  if (entry)
2488    {
2489      /* This often be called by "block read" when most data is already
2490         in L2 and only a few previously evicted items are added to L1
2491         again.  While items in L1 are well protected for a while, L2
2492         items may get evicted soon.  Thus, mark all them as "hit" to give
2493         them a higher chance of survival. */
2494      increment_hit_counters(cache, entry);
2495      *found = TRUE;
2496    }
2497  else
2498    {
2499      *found = FALSE;
2500    }
2501
2502  return SVN_NO_ERROR;
2503}
2504
2505/* Look for an entry identified by KEY.  If no item has been stored
2506 * for KEY, *FOUND will be set to FALSE and TRUE otherwise.
2507 */
2508/* Implements svn_cache__has_key for membuffer caches.
2509 */
2510static svn_error_t *
2511membuffer_cache_has_key(svn_membuffer_t *cache,
2512                        const full_key_t *key,
2513                        svn_boolean_t *found)
2514{
2515  /* find the entry group that will hold the key.
2516   */
2517  apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2518  cache->total_reads++;
2519
2520  WITH_READ_LOCK(cache,
2521                 membuffer_cache_has_key_internal(cache,
2522                                                  group_index,
2523                                                  key,
2524                                                  found));
2525
2526  return SVN_NO_ERROR;
2527}
2528
2529/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2530 * by the hash value TO_FIND. FOUND indicates whether that entry exists.
2531 * If not found, *ITEM will be NULL.
2532 *
2533 * Otherwise, the DESERIALIZER is called with that entry and the BATON
2534 * provided and will extract the desired information. The result is set
2535 * in *ITEM. Allocations will be done in POOL.
2536 *
2537 * Note: This function requires the caller to serialization access.
2538 * Don't call it directly, call membuffer_cache_get_partial instead.
2539 */
2540static svn_error_t *
2541membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
2542                                     apr_uint32_t group_index,
2543                                     const full_key_t *to_find,
2544                                     void **item,
2545                                     svn_boolean_t *found,
2546                                     svn_cache__partial_getter_func_t deserializer,
2547                                     void *baton,
2548                                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2549                                     apr_pool_t *result_pool)
2550{
2551  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2552  cache->total_reads++;
2553  if (entry == NULL)
2554    {
2555      *item = NULL;
2556      *found = FALSE;
2557
2558      return SVN_NO_ERROR;
2559    }
2560  else
2561    {
2562      const void *item_data = cache->data + entry->offset + entry->key.key_len;
2563      apr_size_t item_size = entry->size - entry->key.key_len;
2564      *found = TRUE;
2565      increment_hit_counters(cache, entry);
2566
2567#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2568
2569      /* Check for overlapping entries.
2570       */
2571      SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2572                     entry->offset + entry->size
2573                        <= get_entry(cache, entry->next)->offset);
2574
2575      /* Compare original content, type and key (hashes)
2576       */
2577      SVN_ERR(store_content_part(tag, item_data, item_size, result_pool));
2578      SVN_ERR(assert_equal_tags(&entry->tag, tag));
2579
2580#endif
2581
2582      return deserializer(item, item_data, item_size, baton, result_pool);
2583    }
2584}
2585
2586/* Look for the cache entry identified by KEY. FOUND indicates
2587 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
2588 * the DESERIALIZER is called with that entry and the BATON provided
2589 * and will extract the desired information. The result is set in *ITEM.
2590 * Allocations will be done in POOL.
2591 */
2592static svn_error_t *
2593membuffer_cache_get_partial(svn_membuffer_t *cache,
2594                            const full_key_t *key,
2595                            void **item,
2596                            svn_boolean_t *found,
2597                            svn_cache__partial_getter_func_t deserializer,
2598                            void *baton,
2599                            DEBUG_CACHE_MEMBUFFER_TAG_ARG
2600                            apr_pool_t *result_pool)
2601{
2602  apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2603
2604  WITH_READ_LOCK(cache,
2605                 membuffer_cache_get_partial_internal
2606                     (cache, group_index, key, item, found,
2607                      deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
2608                      result_pool));
2609
2610  return SVN_NO_ERROR;
2611}
2612
2613/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2614 * by the hash value TO_FIND. If no entry has been found, the function
2615 * returns without modifying the cache.
2616 *
2617 * Otherwise, FUNC is called with that entry and the BATON provided
2618 * and may modify the cache entry. Allocations will be done in POOL.
2619 *
2620 * Note: This function requires the caller to serialization access.
2621 * Don't call it directly, call membuffer_cache_set_partial instead.
2622 */
2623static svn_error_t *
2624membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
2625                                     apr_uint32_t group_index,
2626                                     const full_key_t *to_find,
2627                                     svn_cache__partial_setter_func_t func,
2628                                     void *baton,
2629                                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2630                                     apr_pool_t *scratch_pool)
2631{
2632  /* cache item lookup
2633   */
2634  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2635  cache->total_reads++;
2636
2637  /* this function is a no-op if the item is not in cache
2638   */
2639  if (entry != NULL)
2640    {
2641      svn_error_t *err;
2642
2643      /* access the serialized cache item */
2644      apr_size_t key_len = entry->key.key_len;
2645      void *item_data = cache->data + entry->offset + key_len;
2646      void *orig_data = item_data;
2647      apr_size_t item_size = entry->size - key_len;
2648
2649      increment_hit_counters(cache, entry);
2650      cache->total_writes++;
2651
2652#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2653
2654      /* Check for overlapping entries.
2655       */
2656      SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2657                     entry->offset + entry->size
2658                        <= get_entry(cache, entry->next)->offset);
2659
2660      /* Compare original content, type and key (hashes)
2661       */
2662      SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2663      SVN_ERR(assert_equal_tags(&entry->tag, tag));
2664
2665#endif
2666
2667      /* modify it, preferably in-situ.
2668       */
2669      err = func(&item_data, &item_size, baton, scratch_pool);
2670
2671      if (err)
2672        {
2673          /* Something somewhere when wrong while FUNC was modifying the
2674           * changed item. Thus, it might have become invalid /corrupted.
2675           * We better drop that.
2676           */
2677          drop_entry(cache, entry);
2678
2679          return err;
2680        }
2681      else
2682        {
2683          /* if the modification caused a re-allocation, we need to remove
2684           * the old entry and to copy the new data back into cache.
2685           */
2686          if (item_data != orig_data)
2687            {
2688              /* Remove the old entry and try to make space for the new one.
2689               * Note that the key has already been stored in the past, i.e.
2690               * it is shorter than the MAX_ENTRY_SIZE.
2691               */
2692              drop_entry(cache, entry);
2693              if (   (cache->max_entry_size - key_len >= item_size)
2694                  && ensure_data_insertable_l1(cache, item_size + key_len))
2695                {
2696                  /* Write the new entry.
2697                   */
2698                  entry = find_entry(cache, group_index, to_find, TRUE);
2699                  entry->size = item_size + key_len;
2700                  entry->offset = cache->l1.current_data;
2701
2702                  if (key_len)
2703                    memcpy(cache->data + entry->offset,
2704                           to_find->full_key.data, key_len);
2705                  if (item_size)
2706                    memcpy(cache->data + entry->offset + key_len, item_data,
2707                           item_size);
2708
2709                  /* Link the entry properly.
2710                   */
2711                  insert_entry(cache, entry);
2712                }
2713            }
2714
2715#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2716
2717          /* Remember original content, type and key (hashes)
2718           */
2719          SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2720          memcpy(&entry->tag, tag, sizeof(*tag));
2721
2722#endif
2723        }
2724    }
2725
2726  return SVN_NO_ERROR;
2727}
2728
2729/* Look for the cache entry identified by KEY. If no entry
2730 * has been found, the function returns without modifying the cache.
2731 * Otherwise, FUNC is called with that entry and the BATON provided
2732 * and may modify the cache entry. Allocations will be done in POOL.
2733 */
2734static svn_error_t *
2735membuffer_cache_set_partial(svn_membuffer_t *cache,
2736                            const full_key_t *key,
2737                            svn_cache__partial_setter_func_t func,
2738                            void *baton,
2739                            DEBUG_CACHE_MEMBUFFER_TAG_ARG
2740                            apr_pool_t *scratch_pool)
2741{
2742  /* cache item lookup
2743   */
2744  apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2745  WITH_WRITE_LOCK(cache,
2746                  membuffer_cache_set_partial_internal
2747                     (cache, group_index, key, func, baton,
2748                      DEBUG_CACHE_MEMBUFFER_TAG
2749                      scratch_pool));
2750
2751  /* done here -> unlock the cache
2752   */
2753  return SVN_NO_ERROR;
2754}
2755
2756/* Implement the svn_cache__t interface on top of a shared membuffer cache.
2757 *
2758 * Because membuffer caches tend to be very large, there will be rather few
2759 * of them (usually only one). Thus, the same instance shall be used as the
2760 * backend to many application-visible svn_cache__t instances. This should
2761 * also achieve global resource usage fairness.
2762 *
2763 * To accommodate items from multiple resources, the individual keys must be
2764 * unique over all sources. This is achieved by simply adding a prefix key
2765 * that unambiguously identifies the item's context (e.g. path to the
2766 * respective repository). The prefix will be set upon construction of the
2767 * svn_cache__t instance.
2768 */
2769
2770/* Internal cache structure (used in svn_cache__t.cache_internal) basically
2771 * holding the additional parameters needed to call the respective membuffer
2772 * functions.
2773 */
2774typedef struct svn_membuffer_cache_t
2775{
2776  /* this is where all our data will end up in
2777   */
2778  svn_membuffer_t *membuffer;
2779
2780  /* use this conversion function when inserting an item into the memcache
2781   */
2782  svn_cache__serialize_func_t serializer;
2783
2784  /* use this conversion function when reading an item from the memcache
2785   */
2786  svn_cache__deserialize_func_t deserializer;
2787
2788  /* Prepend this to any key passed to us.
2789   * This makes our keys different from all keys used by svn_membuffer_cache_t
2790   * instances that we don't want to share cached data with.
2791   */
2792  entry_key_t prefix;
2793
2794  /* length of the keys that will be passed to us through the
2795   * svn_cache_t interface. May be APR_HASH_KEY_STRING.
2796   */
2797  apr_ssize_t key_len;
2798
2799  /* priority class for all items written through this interface */
2800  apr_uint32_t priority;
2801
2802  /* Temporary buffer containing the hash key for the current access
2803   */
2804  full_key_t combined_key;
2805
2806  /* if enabled, this will serialize the access to this instance.
2807   */
2808  svn_mutex__t *mutex;
2809} svn_membuffer_cache_t;
2810
2811/* Return the prefix key used by CACHE. */
2812static const char *
2813get_prefix_key(const svn_membuffer_cache_t *cache)
2814{
2815  return (cache->prefix.prefix_idx == NO_INDEX
2816       ? cache->combined_key.full_key.data
2817       : cache->membuffer->prefix_pool->values[cache->prefix.prefix_idx]);
2818}
2819
2820/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2821 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2822 * This could replace combine_key() entirely but we actually use it only
2823 * when the quick path failed.
2824 */
2825static void
2826combine_long_key(svn_membuffer_cache_t *cache,
2827                 const void *key,
2828                 apr_ssize_t key_len)
2829{
2830  apr_uint32_t *digest_buffer;
2831  char *key_copy;
2832  apr_size_t prefix_len = cache->prefix.key_len;
2833  apr_size_t aligned_key_len;
2834
2835  /* handle variable-length keys */
2836  if (key_len == APR_HASH_KEY_STRING)
2837    key_len = strlen((const char *) key);
2838
2839  aligned_key_len = ALIGN_VALUE(key_len);
2840
2841  /* Combine keys. */
2842  svn_membuf__ensure(&cache->combined_key.full_key,
2843                     aligned_key_len + prefix_len);
2844
2845  key_copy = (char *)cache->combined_key.full_key.data + prefix_len;
2846  cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len;
2847  memcpy(key_copy, key, key_len);
2848  memset(key_copy + key_len, 0, aligned_key_len - key_len);
2849
2850  /* Hash key into 16 bytes. */
2851  digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint;
2852  svn__fnv1a_32x4_raw(digest_buffer, key, key_len);
2853
2854  /* Combine with prefix. */
2855  cache->combined_key.entry_key.fingerprint[0]
2856    ^= cache->prefix.fingerprint[0];
2857  cache->combined_key.entry_key.fingerprint[1]
2858    ^= cache->prefix.fingerprint[1];
2859}
2860
2861/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2862 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2863 */
2864static void
2865combine_key(svn_membuffer_cache_t *cache,
2866            const void *key,
2867            apr_ssize_t key_len)
2868{
2869  /* copy of *key, padded with 0 */
2870  apr_uint64_t data[2];
2871
2872  /* Do we have to compare full keys? */
2873  if (cache->prefix.prefix_idx == NO_INDEX)
2874    {
2875      combine_long_key(cache, key, key_len);
2876      return;
2877    }
2878
2879  /* short, fixed-size keys are the most common case */
2880  if (key_len == 16)
2881    {
2882      memcpy(data, key, 16);
2883    }
2884  else if (key_len == 8)
2885    {
2886      memcpy(data, key, 8);
2887      data[1] = 0;
2888    }
2889  else
2890    {
2891      assert(key_len != APR_HASH_KEY_STRING && key_len < 16);
2892      data[0] = 0;
2893      data[1] = 0;
2894      memcpy(data, key, key_len);
2895    }
2896
2897  /* Scramble key DATA to spread the key space more evenly across the
2898   * cache segments and entry buckets.  All of this shall be reversible
2899   * to prevent key collisions.  So, we limit ourselves to xor and
2900   * permutations.
2901   *
2902   * Since the entry key must preserve the full key (prefix and KEY),
2903   * the scramble must not introduce KEY collisions.
2904   */
2905  data[1] = (data[1] << 27) | (data[1] >> 37);
2906  data[1] ^= data[0] & 0xffff;
2907  data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
2908
2909  /* Combine with this cache's prefix.  This is reversible because the
2910   * prefix is known through to the respective entry_key element.  So,
2911   * knowing entry_key.prefix_id, we can still reconstruct KEY (and the
2912   * prefix key).
2913   */
2914  cache->combined_key.entry_key.fingerprint[0]
2915    = data[0] ^ cache->prefix.fingerprint[0];
2916  cache->combined_key.entry_key.fingerprint[1]
2917    = data[1] ^ cache->prefix.fingerprint[1];
2918}
2919
2920/* Implement svn_cache__vtable_t.get (not thread-safe)
2921 */
2922static svn_error_t *
2923svn_membuffer_cache_get(void **value_p,
2924                        svn_boolean_t *found,
2925                        void *cache_void,
2926                        const void *key,
2927                        apr_pool_t *result_pool)
2928{
2929  svn_membuffer_cache_t *cache = cache_void;
2930
2931  DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2932
2933  /* special case */
2934  if (key == NULL)
2935    {
2936      *value_p = NULL;
2937      *found = FALSE;
2938
2939      return SVN_NO_ERROR;
2940    }
2941
2942  /* construct the full, i.e. globally unique, key by adding
2943   * this cache instances' prefix
2944   */
2945  combine_key(cache, key, cache->key_len);
2946
2947  /* Look the item up. */
2948  SVN_ERR(membuffer_cache_get(cache->membuffer,
2949                              &cache->combined_key,
2950                              value_p,
2951                              cache->deserializer,
2952                              DEBUG_CACHE_MEMBUFFER_TAG
2953                              result_pool));
2954
2955  /* return result */
2956  *found = *value_p != NULL;
2957
2958  return SVN_NO_ERROR;
2959}
2960
2961/* Implement svn_cache__vtable_t.has_key (not thread-safe)
2962 */
2963static svn_error_t *
2964svn_membuffer_cache_has_key(svn_boolean_t *found,
2965                            void *cache_void,
2966                            const void *key,
2967                            apr_pool_t *scratch_pool)
2968{
2969  svn_membuffer_cache_t *cache = cache_void;
2970
2971  /* special case */
2972  if (key == NULL)
2973    {
2974      *found = FALSE;
2975
2976      return SVN_NO_ERROR;
2977    }
2978
2979  /* construct the full, i.e. globally unique, key by adding
2980   * this cache instances' prefix
2981   */
2982  combine_key(cache, key, cache->key_len);
2983
2984  /* Look the item up. */
2985  SVN_ERR(membuffer_cache_has_key(cache->membuffer,
2986                                  &cache->combined_key,
2987                                  found));
2988
2989  /* return result */
2990  return SVN_NO_ERROR;
2991}
2992
2993/* Implement svn_cache__vtable_t.set (not thread-safe)
2994 */
2995static svn_error_t *
2996svn_membuffer_cache_set(void *cache_void,
2997                        const void *key,
2998                        void *value,
2999                        apr_pool_t *scratch_pool)
3000{
3001  svn_membuffer_cache_t *cache = cache_void;
3002
3003  DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
3004
3005  /* special case */
3006  if (key == NULL)
3007    return SVN_NO_ERROR;
3008
3009  /* construct the full, i.e. globally unique, key by adding
3010   * this cache instances' prefix
3011   */
3012  combine_key(cache, key, cache->key_len);
3013
3014  /* (probably) add the item to the cache. But there is no real guarantee
3015   * that the item will actually be cached afterwards.
3016   */
3017  return membuffer_cache_set(cache->membuffer,
3018                             &cache->combined_key,
3019                             value,
3020                             cache->serializer,
3021                             cache->priority,
3022                             DEBUG_CACHE_MEMBUFFER_TAG
3023                             scratch_pool);
3024}
3025
3026/* Implement svn_cache__vtable_t.iter as "not implemented"
3027 */
3028static svn_error_t *
3029svn_membuffer_cache_iter(svn_boolean_t *completed,
3030                          void *cache_void,
3031                          svn_iter_apr_hash_cb_t user_cb,
3032                          void *user_baton,
3033                          apr_pool_t *scratch_pool)
3034{
3035  return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
3036                          _("Can't iterate a membuffer-based cache"));
3037}
3038
3039/* Implement svn_cache__vtable_t.get_partial (not thread-safe)
3040 */
3041static svn_error_t *
3042svn_membuffer_cache_get_partial(void **value_p,
3043                                svn_boolean_t *found,
3044                                void *cache_void,
3045                                const void *key,
3046                                svn_cache__partial_getter_func_t func,
3047                                void *baton,
3048                                apr_pool_t *result_pool)
3049{
3050  svn_membuffer_cache_t *cache = cache_void;
3051
3052  DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
3053
3054  if (key == NULL)
3055    {
3056      *value_p = NULL;
3057      *found = FALSE;
3058
3059      return SVN_NO_ERROR;
3060    }
3061
3062  combine_key(cache, key, cache->key_len);
3063  SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
3064                                      &cache->combined_key,
3065                                      value_p,
3066                                      found,
3067                                      func,
3068                                      baton,
3069                                      DEBUG_CACHE_MEMBUFFER_TAG
3070                                      result_pool));
3071
3072  return SVN_NO_ERROR;
3073}
3074
3075/* Implement svn_cache__vtable_t.set_partial (not thread-safe)
3076 */
3077static svn_error_t *
3078svn_membuffer_cache_set_partial(void *cache_void,
3079                                const void *key,
3080                                svn_cache__partial_setter_func_t func,
3081                                void *baton,
3082                                apr_pool_t *scratch_pool)
3083{
3084  svn_membuffer_cache_t *cache = cache_void;
3085
3086  DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
3087
3088  if (key != NULL)
3089    {
3090      combine_key(cache, key, cache->key_len);
3091      SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
3092                                          &cache->combined_key,
3093                                          func,
3094                                          baton,
3095                                          DEBUG_CACHE_MEMBUFFER_TAG
3096                                          scratch_pool));
3097    }
3098  return SVN_NO_ERROR;
3099}
3100
3101/* Implement svn_cache__vtable_t.is_cachable
3102 * (thread-safe even without mutex)
3103 */
3104static svn_boolean_t
3105svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
3106{
3107  /* Don't allow extremely large element sizes. Otherwise, the cache
3108   * might by thrashed by a few extremely large entries. And the size
3109   * must be small enough to be stored in a 32 bit value.
3110   */
3111  svn_membuffer_cache_t *cache = cache_void;
3112  return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY
3113       ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size
3114       : size <= cache->membuffer->max_entry_size;
3115}
3116
3117/* Add statistics of SEGMENT to INFO.  If INCLUDE_HISTOGRAM is TRUE,
3118 * accumulate index bucket fill levels in INFO->HISTOGRAM.
3119 */
3120static svn_error_t *
3121svn_membuffer_get_segment_info(svn_membuffer_t *segment,
3122                               svn_cache__info_t *info,
3123                               svn_boolean_t include_histogram)
3124{
3125  apr_uint32_t i;
3126
3127  info->data_size += segment->l1.size + segment->l2.size;
3128  info->used_size += segment->data_used;
3129  info->total_size += segment->l1.size + segment->l2.size +
3130      segment->group_count * GROUP_SIZE * sizeof(entry_t);
3131
3132  info->used_entries += segment->used_entries;
3133  info->total_entries += segment->group_count * GROUP_SIZE;
3134
3135  if (include_histogram)
3136    for (i = 0; i < segment->group_count; ++i)
3137      if (is_group_initialized(segment, i))
3138        {
3139          entry_group_t *chain_end
3140            = last_group_in_chain(segment, &segment->directory[i]);
3141          apr_size_t use
3142            = MIN(chain_end->header.used,
3143                  sizeof(info->histogram) / sizeof(info->histogram[0]) - 1);
3144          info->histogram[use]++;
3145        }
3146
3147  return SVN_NO_ERROR;
3148}
3149
3150/* Implement svn_cache__vtable_t.get_info
3151 * (thread-safe even without mutex)
3152 */
3153static svn_error_t *
3154svn_membuffer_cache_get_info(void *cache_void,
3155                             svn_cache__info_t *info,
3156                             svn_boolean_t reset,
3157                             apr_pool_t *result_pool)
3158{
3159  svn_membuffer_cache_t *cache = cache_void;
3160  apr_uint32_t i;
3161
3162  /* cache front-end specific data */
3163
3164  info->id = apr_pstrdup(result_pool, get_prefix_key(cache));
3165
3166  /* collect info from shared cache back-end */
3167
3168  for (i = 0; i < cache->membuffer->segment_count; ++i)
3169    {
3170      svn_membuffer_t *segment = cache->membuffer + i;
3171      WITH_READ_LOCK(segment,
3172                     svn_membuffer_get_segment_info(segment, info, FALSE));
3173    }
3174
3175  return SVN_NO_ERROR;
3176}
3177
3178
3179/* the v-table for membuffer-based caches (single-threaded access)
3180 */
3181static svn_cache__vtable_t membuffer_cache_vtable = {
3182  svn_membuffer_cache_get,
3183  svn_membuffer_cache_has_key,
3184  svn_membuffer_cache_set,
3185  svn_membuffer_cache_iter,
3186  svn_membuffer_cache_is_cachable,
3187  svn_membuffer_cache_get_partial,
3188  svn_membuffer_cache_set_partial,
3189  svn_membuffer_cache_get_info
3190};
3191
3192/* Implement svn_cache__vtable_t.get and serialize all cache access.
3193 */
3194static svn_error_t *
3195svn_membuffer_cache_get_synced(void **value_p,
3196                               svn_boolean_t *found,
3197                               void *cache_void,
3198                               const void *key,
3199                               apr_pool_t *result_pool)
3200{
3201  svn_membuffer_cache_t *cache = cache_void;
3202  SVN_MUTEX__WITH_LOCK(cache->mutex,
3203                       svn_membuffer_cache_get(value_p,
3204                                               found,
3205                                               cache_void,
3206                                               key,
3207                                               result_pool));
3208
3209  return SVN_NO_ERROR;
3210}
3211
3212/* Implement svn_cache__vtable_t.has_key and serialize all cache access.
3213 */
3214static svn_error_t *
3215svn_membuffer_cache_has_key_synced(svn_boolean_t *found,
3216                                   void *cache_void,
3217                                   const void *key,
3218                                   apr_pool_t *result_pool)
3219{
3220  svn_membuffer_cache_t *cache = cache_void;
3221  SVN_MUTEX__WITH_LOCK(cache->mutex,
3222                       svn_membuffer_cache_has_key(found,
3223                                                   cache_void,
3224                                                   key,
3225                                                   result_pool));
3226
3227  return SVN_NO_ERROR;
3228}
3229
3230/* Implement svn_cache__vtable_t.set and serialize all cache access.
3231 */
3232static svn_error_t *
3233svn_membuffer_cache_set_synced(void *cache_void,
3234                               const void *key,
3235                               void *value,
3236                               apr_pool_t *scratch_pool)
3237{
3238  svn_membuffer_cache_t *cache = cache_void;
3239  SVN_MUTEX__WITH_LOCK(cache->mutex,
3240                       svn_membuffer_cache_set(cache_void,
3241                                               key,
3242                                               value,
3243                                               scratch_pool));
3244
3245  return SVN_NO_ERROR;
3246}
3247
3248/* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
3249 */
3250static svn_error_t *
3251svn_membuffer_cache_get_partial_synced(void **value_p,
3252                                       svn_boolean_t *found,
3253                                       void *cache_void,
3254                                       const void *key,
3255                                       svn_cache__partial_getter_func_t func,
3256                                       void *baton,
3257                                       apr_pool_t *result_pool)
3258{
3259  svn_membuffer_cache_t *cache = cache_void;
3260  SVN_MUTEX__WITH_LOCK(cache->mutex,
3261                       svn_membuffer_cache_get_partial(value_p,
3262                                                       found,
3263                                                       cache_void,
3264                                                       key,
3265                                                       func,
3266                                                       baton,
3267                                                       result_pool));
3268
3269  return SVN_NO_ERROR;
3270}
3271
3272/* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
3273 */
3274static svn_error_t *
3275svn_membuffer_cache_set_partial_synced(void *cache_void,
3276                                       const void *key,
3277                                       svn_cache__partial_setter_func_t func,
3278                                       void *baton,
3279                                       apr_pool_t *scratch_pool)
3280{
3281  svn_membuffer_cache_t *cache = cache_void;
3282  SVN_MUTEX__WITH_LOCK(cache->mutex,
3283                       svn_membuffer_cache_set_partial(cache_void,
3284                                                       key,
3285                                                       func,
3286                                                       baton,
3287                                                       scratch_pool));
3288
3289  return SVN_NO_ERROR;
3290}
3291
3292/* the v-table for membuffer-based caches with multi-threading support)
3293 */
3294static svn_cache__vtable_t membuffer_cache_synced_vtable = {
3295  svn_membuffer_cache_get_synced,
3296  svn_membuffer_cache_has_key_synced,
3297  svn_membuffer_cache_set_synced,
3298  svn_membuffer_cache_iter,               /* no sync required */
3299  svn_membuffer_cache_is_cachable,        /* no sync required */
3300  svn_membuffer_cache_get_partial_synced,
3301  svn_membuffer_cache_set_partial_synced,
3302  svn_membuffer_cache_get_info            /* no sync required */
3303};
3304
3305/* standard serialization function for svn_stringbuf_t items.
3306 * Implements svn_cache__serialize_func_t.
3307 */
3308static svn_error_t *
3309serialize_svn_stringbuf(void **buffer,
3310                        apr_size_t *buffer_size,
3311                        void *item,
3312                        apr_pool_t *result_pool)
3313{
3314  svn_stringbuf_t *value_str = item;
3315
3316  *buffer = value_str->data;
3317  *buffer_size = value_str->len + 1;
3318
3319  return SVN_NO_ERROR;
3320}
3321
3322/* standard de-serialization function for svn_stringbuf_t items.
3323 * Implements svn_cache__deserialize_func_t.
3324 */
3325static svn_error_t *
3326deserialize_svn_stringbuf(void **item,
3327                          void *buffer,
3328                          apr_size_t buffer_size,
3329                          apr_pool_t *result_pool)
3330{
3331  svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
3332
3333  value_str->pool = result_pool;
3334  value_str->blocksize = buffer_size;
3335  value_str->data = buffer;
3336  value_str->len = buffer_size-1;
3337  *item = value_str;
3338
3339  return SVN_NO_ERROR;
3340}
3341
3342/* Construct a svn_cache__t object on top of a shared memcache.
3343 */
3344svn_error_t *
3345svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
3346                                  svn_membuffer_t *membuffer,
3347                                  svn_cache__serialize_func_t serializer,
3348                                  svn_cache__deserialize_func_t deserializer,
3349                                  apr_ssize_t klen,
3350                                  const char *prefix,
3351                                  apr_uint32_t priority,
3352                                  svn_boolean_t thread_safe,
3353                                  svn_boolean_t short_lived,
3354                                  apr_pool_t *result_pool,
3355                                  apr_pool_t *scratch_pool)
3356{
3357  svn_checksum_t *checksum;
3358  apr_size_t prefix_len, prefix_orig_len;
3359
3360  /* allocate the cache header structures
3361   */
3362  svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
3363  svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache));
3364
3365  /* initialize our internal cache header
3366   */
3367  cache->membuffer = membuffer;
3368  cache->serializer = serializer
3369                    ? serializer
3370                    : serialize_svn_stringbuf;
3371  cache->deserializer = deserializer
3372                      ? deserializer
3373                      : deserialize_svn_stringbuf;
3374  cache->priority = priority;
3375  cache->key_len = klen;
3376
3377  SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool));
3378
3379  /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT.
3380   * Don't forget to include the terminating NUL. */
3381  prefix_orig_len = strlen(prefix) + 1;
3382  prefix_len = ALIGN_VALUE(prefix_orig_len);
3383
3384  /* Paranoia check to ensure pointer arithmetics work as expected. */
3385  if (prefix_orig_len >= SVN_MAX_OBJECT_SIZE)
3386    return svn_error_create(SVN_ERR_INCORRECT_PARAMS, NULL,
3387                            _("Prefix too long"));
3388
3389  /* Construct the folded prefix key. */
3390  SVN_ERR(svn_checksum(&checksum,
3391                       svn_checksum_md5,
3392                       prefix,
3393                       strlen(prefix),
3394                       scratch_pool));
3395  memcpy(cache->prefix.fingerprint, checksum->digest,
3396         sizeof(cache->prefix.fingerprint));
3397  cache->prefix.key_len = prefix_len;
3398
3399  /* Fix-length keys of up to 16 bytes may be handled without storing the
3400   * full key separately for each item. */
3401  if (   (klen != APR_HASH_KEY_STRING)
3402      && (klen <= sizeof(cache->combined_key.entry_key.fingerprint))
3403      && !short_lived)
3404    SVN_ERR(prefix_pool_get(&cache->prefix.prefix_idx,
3405                            membuffer->prefix_pool,
3406                            prefix));
3407  else
3408    cache->prefix.prefix_idx = NO_INDEX;
3409
3410  /* If key combining is not guaranteed to produce unique results, we have
3411   * to handle full keys.  Otherwise, leave it NULL. */
3412  if (cache->prefix.prefix_idx == NO_INDEX)
3413    {
3414      /* Initialize the combined key. Pre-allocate some extra room in the
3415       * full key such that we probably don't need to re-alloc. */
3416      cache->combined_key.entry_key = cache->prefix;
3417      svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200,
3418                         result_pool);
3419      memcpy((char *)cache->combined_key.full_key.data, prefix,
3420             prefix_orig_len);
3421      memset((char *)cache->combined_key.full_key.data + prefix_orig_len, 0,
3422             prefix_len - prefix_orig_len);
3423    }
3424  else
3425    {
3426      /* Initialize the combined key.  We will never have the full combined
3427       * key, so leave it NULL and set its length to 0 to prevent access to
3428       * it.  Keep the fingerprint 0 as well b/c it will always be set anew
3429       * by combine_key(). */
3430      cache->combined_key.entry_key.prefix_idx = cache->prefix.prefix_idx;
3431      cache->combined_key.entry_key.key_len = 0;
3432    }
3433
3434  /* initialize the generic cache wrapper
3435   */
3436  wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
3437                                : &membuffer_cache_vtable;
3438  wrapper->cache_internal = cache;
3439  wrapper->error_handler = 0;
3440  wrapper->error_baton = 0;
3441  wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT");
3442
3443  *cache_p = wrapper;
3444  return SVN_NO_ERROR;
3445}
3446
3447static svn_error_t *
3448svn_membuffer_get_global_segment_info(svn_membuffer_t *segment,
3449                                      svn_cache__info_t *info)
3450{
3451  info->gets += segment->total_reads;
3452  info->sets += segment->total_writes;
3453  info->hits += segment->total_hits;
3454
3455  WITH_READ_LOCK(segment,
3456                  svn_membuffer_get_segment_info(segment, info, TRUE));
3457
3458  return SVN_NO_ERROR;
3459}
3460
3461svn_cache__info_t *
3462svn_cache__membuffer_get_global_info(apr_pool_t *pool)
3463{
3464  apr_uint32_t i;
3465
3466  svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache();
3467  svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info));
3468
3469  /* cache front-end specific data */
3470
3471  info->id = "membuffer globals";
3472
3473  /* collect info from shared cache back-end */
3474
3475  for (i = 0; i < membuffer->segment_count; ++i)
3476    svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i,
3477                                                          info));
3478
3479  return info;
3480}
3481