1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr_general.h"
18
19#include "mod_cache.h"
20#include "cache_hash.h"
21
22#if APR_HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#if APR_HAVE_STRING_H
26#include <string.h>
27#endif
28
29
30/*
31 * The internal form of a hash table.
32 *
33 * The table is an array indexed by the hash of the key; collisions
34 * are resolved by hanging a linked list of hash entries off each
35 * element of the array. Although this is a really simple design it
36 * isn't too bad given that pools have a low allocation overhead.
37 */
38
39typedef struct cache_hash_entry_t cache_hash_entry_t;
40
41struct cache_hash_entry_t {
42    cache_hash_entry_t   *next;
43    unsigned int         hash;
44    const void           *key;
45    apr_ssize_t          klen;
46    const void           *val;
47};
48
49/*
50 * Data structure for iterating through a hash table.
51 *
52 * We keep a pointer to the next hash entry here to allow the current
53 * hash entry to be freed or otherwise mangled between calls to
54 * cache_hash_next().
55 */
56struct cache_hash_index_t {
57    cache_hash_t         *ht;
58    cache_hash_entry_t   *this, *next;
59    int                  index;
60};
61
62/*
63 * The size of the array is always a power of two. We use the maximum
64 * index rather than the size so that we can use bitwise-AND for
65 * modular arithmetic.
66 * The count of hash entries may be greater depending on the chosen
67 * collision rate.
68 */
69struct cache_hash_t {
70    cache_hash_entry_t   **array;
71    cache_hash_index_t     iterator;  /* For cache_hash_first(NULL, ...) */
72    int                  count, max;
73};
74
75/*
76 * Hash creation functions.
77 */
78static cache_hash_entry_t **alloc_array(cache_hash_t *ht, int max)
79{
80   return calloc(1, sizeof(*ht->array) * (max + 1));
81}
82
83cache_hash_t* cache_hash_make(apr_size_t size)
84{
85    cache_hash_t *ht;
86    ht = malloc(sizeof(cache_hash_t));
87    if (!ht) {
88        return NULL;
89    }
90    ht->count = 0;
91    ht->max = size;
92    ht->array = alloc_array(ht, ht->max);
93    if (!ht->array) {
94        free(ht);
95        return NULL;
96    }
97    return ht;
98}
99
100void cache_hash_free(cache_hash_t *ht)
101{
102    if (ht) {
103        if (ht->array) {
104            free (ht->array);
105        }
106        free (ht);
107    }
108}
109/*
110 * Hash iteration functions.
111 */
112
113cache_hash_index_t* cache_hash_next(cache_hash_index_t *hi)
114{
115    hi->this = hi->next;
116    while (!hi->this) {
117        if (hi->index > hi->ht->max)
118            return NULL;
119        hi->this = hi->ht->array[hi->index++];
120    }
121    hi->next = hi->this->next;
122    return hi;
123}
124
125cache_hash_index_t* cache_hash_first(cache_hash_t *ht)
126{
127    cache_hash_index_t *hi;
128
129    hi = &ht->iterator;
130    hi->ht = ht;
131    hi->index = 0;
132    hi->this = NULL;
133    hi->next = NULL;
134    return cache_hash_next(hi);
135}
136
137void cache_hash_this(cache_hash_index_t *hi,
138                                  const void **key,
139                                  apr_ssize_t *klen,
140                                  void **val)
141{
142    if (key)  *key  = hi->this->key;
143    if (klen) *klen = hi->this->klen;
144    if (val)  *val  = (void *)hi->this->val;
145}
146
147
148/*
149 * This is where we keep the details of the hash function and control
150 * the maximum collision rate.
151 *
152 * If val is non-NULL it creates and initializes a new hash entry if
153 * there isn't already one there; it returns an updatable pointer so
154 * that hash entries can be removed.
155 */
156
157static cache_hash_entry_t **find_entry(cache_hash_t *ht,
158                                       const void *key,
159                                       apr_ssize_t klen,
160                                       const void *val)
161{
162    cache_hash_entry_t **hep, *he;
163    const unsigned char *p;
164    unsigned int hash;
165    apr_ssize_t i;
166
167    /*
168     * This is the popular `times 33' hash algorithm which is used by
169     * perl and also appears in Berkeley DB. This is one of the best
170     * known hash functions for strings because it is both computed
171     * very fast and distributes very well.
172     *
173     * The originator may be Dan Bernstein but the code in Berkeley DB
174     * cites Chris Torek as the source. The best citation I have found
175     * is "Chris Torek, Hash function for text in C, Usenet message
176     * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
177     * Salz's USENIX 1992 paper about INN which can be found at
178     * <http://citeseer.nj.nec.com/salz92internetnews.html>.
179     *
180     * The magic of number 33, i.e. why it works better than many other
181     * constants, prime or not, has never been adequately explained by
182     * anyone. So I try an explanation: if one experimentally tests all
183     * multipliers between 1 and 256 (as I did while writing a low-level
184     * data structure library some time ago) one detects that even
185     * numbers are not useable at all. The remaining 128 odd numbers
186     * (except for the number 1) work more or less all equally well.
187     * They all distribute in an acceptable way and this way fill a hash
188     * table with an average percent of approx. 86%.
189     *
190     * If one compares the chi^2 values of the variants (see
191     * Bob Jenkins ``Hashing Frequently Asked Questions'' at
192     * http://burtleburtle.net/bob/hash/hashfaq.html for a description
193     * of chi^2), the number 33 not even has the best value. But the
194     * number 33 and a few other equally good numbers like 17, 31, 63,
195     * 127 and 129 have nevertheless a great advantage to the remaining
196     * numbers in the large set of possible multipliers: their multiply
197     * operation can be replaced by a faster operation based on just one
198     * shift plus either a single addition or subtraction operation. And
199     * because a hash function has to both distribute good _and_ has to
200     * be very fast to compute, those few numbers should be preferred.
201     *
202     *                  -- Ralf S. Engelschall <rse@engelschall.com>
203     */
204    hash = 0;
205    if (klen == CACHE_HASH_KEY_STRING) {
206        for (p = key; *p; p++) {
207            hash = hash * 33 + *p;
208        }
209        klen = p - (const unsigned char *)key;
210    }
211    else {
212        for (p = key, i = klen; i; i--, p++) {
213            hash = hash * 33 + *p;
214        }
215    }
216
217    /* scan linked list */
218    for (hep = &ht->array[hash % ht->max], he = *hep;
219         he;
220         hep = &he->next, he = *hep) {
221        if (he->hash == hash &&
222            he->klen == klen &&
223            memcmp(he->key, key, klen) == 0)
224            break;
225    }
226    if (he || !val)
227        return hep;
228    /* add a new entry for non-NULL values */
229    he = malloc(sizeof(*he));
230    if (!he) {
231        return NULL;
232    }
233    he->next = NULL;
234    he->hash = hash;
235    he->key  = key;
236    he->klen = klen;
237    he->val  = val;
238    *hep = he;
239    ht->count++;
240    return hep;
241}
242
243void* cache_hash_get(cache_hash_t *ht,
244                                   const void *key,
245                                   apr_ssize_t klen)
246{
247    cache_hash_entry_t *he;
248    he = *find_entry(ht, key, klen, NULL);
249    if (he)
250        return (void *)he->val;
251    else
252        return NULL;
253}
254
255void* cache_hash_set(cache_hash_t *ht,
256                                     const void *key,
257                                     apr_ssize_t klen,
258                                     const void *val)
259{
260    cache_hash_entry_t **hep, *tmp;
261    const void *tval;
262    hep = find_entry(ht, key, klen, val);
263    /* If hep == NULL, then the malloc() in find_entry failed */
264    if (hep && *hep) {
265        if (!val) {
266            /* delete entry */
267            tval = (*hep)->val;
268            tmp = *hep;
269            *hep = (*hep)->next;
270            free(tmp);
271            --ht->count;
272        }
273        else {
274            /* replace entry */
275            tval = (*hep)->val;
276            (*hep)->val = val;
277        }
278        /* Return the object just removed from the cache to let the
279         * caller clean it up. Cast the constness away upon return.
280         */
281        return (void *) tval;
282    }
283    /* else key not present and val==NULL */
284    return NULL;
285}
286
287int cache_hash_count(cache_hash_t *ht)
288{
289    return ht->count;
290}
291