1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "apr_general.h" 18 19#include "mod_cache.h" 20#include "cache_hash.h" 21 22#if APR_HAVE_STDLIB_H 23#include <stdlib.h> 24#endif 25#if APR_HAVE_STRING_H 26#include <string.h> 27#endif 28 29 30/* 31 * The internal form of a hash table. 32 * 33 * The table is an array indexed by the hash of the key; collisions 34 * are resolved by hanging a linked list of hash entries off each 35 * element of the array. Although this is a really simple design it 36 * isn't too bad given that pools have a low allocation overhead. 37 */ 38 39typedef struct cache_hash_entry_t cache_hash_entry_t; 40 41struct cache_hash_entry_t { 42 cache_hash_entry_t *next; 43 unsigned int hash; 44 const void *key; 45 apr_ssize_t klen; 46 const void *val; 47}; 48 49/* 50 * Data structure for iterating through a hash table. 51 * 52 * We keep a pointer to the next hash entry here to allow the current 53 * hash entry to be freed or otherwise mangled between calls to 54 * cache_hash_next(). 55 */ 56struct cache_hash_index_t { 57 cache_hash_t *ht; 58 cache_hash_entry_t *this, *next; 59 int index; 60}; 61 62/* 63 * The size of the array is always a power of two. We use the maximum 64 * index rather than the size so that we can use bitwise-AND for 65 * modular arithmetic. 66 * The count of hash entries may be greater depending on the chosen 67 * collision rate. 68 */ 69struct cache_hash_t { 70 cache_hash_entry_t **array; 71 cache_hash_index_t iterator; /* For cache_hash_first(NULL, ...) */ 72 int count, max; 73}; 74 75/* 76 * Hash creation functions. 77 */ 78static cache_hash_entry_t **alloc_array(cache_hash_t *ht, int max) 79{ 80 return calloc(1, sizeof(*ht->array) * (max + 1)); 81} 82 83cache_hash_t* cache_hash_make(apr_size_t size) 84{ 85 cache_hash_t *ht; 86 ht = malloc(sizeof(cache_hash_t)); 87 if (!ht) { 88 return NULL; 89 } 90 ht->count = 0; 91 ht->max = size; 92 ht->array = alloc_array(ht, ht->max); 93 if (!ht->array) { 94 free(ht); 95 return NULL; 96 } 97 return ht; 98} 99 100void cache_hash_free(cache_hash_t *ht) 101{ 102 if (ht) { 103 if (ht->array) { 104 free (ht->array); 105 } 106 free (ht); 107 } 108} 109/* 110 * Hash iteration functions. 111 */ 112 113cache_hash_index_t* cache_hash_next(cache_hash_index_t *hi) 114{ 115 hi->this = hi->next; 116 while (!hi->this) { 117 if (hi->index > hi->ht->max) 118 return NULL; 119 hi->this = hi->ht->array[hi->index++]; 120 } 121 hi->next = hi->this->next; 122 return hi; 123} 124 125cache_hash_index_t* cache_hash_first(cache_hash_t *ht) 126{ 127 cache_hash_index_t *hi; 128 129 hi = &ht->iterator; 130 hi->ht = ht; 131 hi->index = 0; 132 hi->this = NULL; 133 hi->next = NULL; 134 return cache_hash_next(hi); 135} 136 137void cache_hash_this(cache_hash_index_t *hi, 138 const void **key, 139 apr_ssize_t *klen, 140 void **val) 141{ 142 if (key) *key = hi->this->key; 143 if (klen) *klen = hi->this->klen; 144 if (val) *val = (void *)hi->this->val; 145} 146 147 148/* 149 * This is where we keep the details of the hash function and control 150 * the maximum collision rate. 151 * 152 * If val is non-NULL it creates and initializes a new hash entry if 153 * there isn't already one there; it returns an updatable pointer so 154 * that hash entries can be removed. 155 */ 156 157static cache_hash_entry_t **find_entry(cache_hash_t *ht, 158 const void *key, 159 apr_ssize_t klen, 160 const void *val) 161{ 162 cache_hash_entry_t **hep, *he; 163 const unsigned char *p; 164 unsigned int hash; 165 apr_ssize_t i; 166 167 /* 168 * This is the popular `times 33' hash algorithm which is used by 169 * perl and also appears in Berkeley DB. This is one of the best 170 * known hash functions for strings because it is both computed 171 * very fast and distributes very well. 172 * 173 * The originator may be Dan Bernstein but the code in Berkeley DB 174 * cites Chris Torek as the source. The best citation I have found 175 * is "Chris Torek, Hash function for text in C, Usenet message 176 * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich 177 * Salz's USENIX 1992 paper about INN which can be found at 178 * <http://citeseer.nj.nec.com/salz92internetnews.html>. 179 * 180 * The magic of number 33, i.e. why it works better than many other 181 * constants, prime or not, has never been adequately explained by 182 * anyone. So I try an explanation: if one experimentally tests all 183 * multipliers between 1 and 256 (as I did while writing a low-level 184 * data structure library some time ago) one detects that even 185 * numbers are not useable at all. The remaining 128 odd numbers 186 * (except for the number 1) work more or less all equally well. 187 * They all distribute in an acceptable way and this way fill a hash 188 * table with an average percent of approx. 86%. 189 * 190 * If one compares the chi^2 values of the variants (see 191 * Bob Jenkins ``Hashing Frequently Asked Questions'' at 192 * http://burtleburtle.net/bob/hash/hashfaq.html for a description 193 * of chi^2), the number 33 not even has the best value. But the 194 * number 33 and a few other equally good numbers like 17, 31, 63, 195 * 127 and 129 have nevertheless a great advantage to the remaining 196 * numbers in the large set of possible multipliers: their multiply 197 * operation can be replaced by a faster operation based on just one 198 * shift plus either a single addition or subtraction operation. And 199 * because a hash function has to both distribute good _and_ has to 200 * be very fast to compute, those few numbers should be preferred. 201 * 202 * -- Ralf S. Engelschall <rse@engelschall.com> 203 */ 204 hash = 0; 205 if (klen == CACHE_HASH_KEY_STRING) { 206 for (p = key; *p; p++) { 207 hash = hash * 33 + *p; 208 } 209 klen = p - (const unsigned char *)key; 210 } 211 else { 212 for (p = key, i = klen; i; i--, p++) { 213 hash = hash * 33 + *p; 214 } 215 } 216 217 /* scan linked list */ 218 for (hep = &ht->array[hash % ht->max], he = *hep; 219 he; 220 hep = &he->next, he = *hep) { 221 if (he->hash == hash && 222 he->klen == klen && 223 memcmp(he->key, key, klen) == 0) 224 break; 225 } 226 if (he || !val) 227 return hep; 228 /* add a new entry for non-NULL values */ 229 he = malloc(sizeof(*he)); 230 if (!he) { 231 return NULL; 232 } 233 he->next = NULL; 234 he->hash = hash; 235 he->key = key; 236 he->klen = klen; 237 he->val = val; 238 *hep = he; 239 ht->count++; 240 return hep; 241} 242 243void* cache_hash_get(cache_hash_t *ht, 244 const void *key, 245 apr_ssize_t klen) 246{ 247 cache_hash_entry_t *he; 248 he = *find_entry(ht, key, klen, NULL); 249 if (he) 250 return (void *)he->val; 251 else 252 return NULL; 253} 254 255void* cache_hash_set(cache_hash_t *ht, 256 const void *key, 257 apr_ssize_t klen, 258 const void *val) 259{ 260 cache_hash_entry_t **hep, *tmp; 261 const void *tval; 262 hep = find_entry(ht, key, klen, val); 263 /* If hep == NULL, then the malloc() in find_entry failed */ 264 if (hep && *hep) { 265 if (!val) { 266 /* delete entry */ 267 tval = (*hep)->val; 268 tmp = *hep; 269 *hep = (*hep)->next; 270 free(tmp); 271 --ht->count; 272 } 273 else { 274 /* replace entry */ 275 tval = (*hep)->val; 276 (*hep)->val = val; 277 } 278 /* Return the object just removed from the cache to let the 279 * caller clean it up. Cast the constness away upon return. 280 */ 281 return (void *) tval; 282 } 283 /* else key not present and val==NULL */ 284 return NULL; 285} 286 287int cache_hash_count(cache_hash_t *ht) 288{ 289 return ht->count; 290} 291