1#ifndef UBI_CACHE_H
2#define UBI_CACHE_H
3/* ========================================================================== **
4 *                                 ubi_Cache.h
5 *
6 *  Copyright (C) 1997 by Christopher R. Hertel
7 *
8 *  Email: crh@ubiqx.mn.org
9 * -------------------------------------------------------------------------- **
10 *
11 *  This module implements a generic cache.
12 *
13 * -------------------------------------------------------------------------- **
14 *
15 *  This library is free software; you can redistribute it and/or
16 *  modify it under the terms of the GNU Library General Public
17 *  License as published by the Free Software Foundation; either
18 *  version 2 of the License, or (at your option) any later version.
19 *
20 *  This library is distributed in the hope that it will be useful,
21 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23 *  Library General Public License for more details.
24 *
25 *  You should have received a copy of the GNU Library General Public
26 *  License along with this library; if not, write to the Free
27 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 *
29 * -------------------------------------------------------------------------- **
30 *
31 *  This module uses a splay tree to implement a simple cache.  The cache
32 *  module adds a thin layer of functionality to the splay tree.  In
33 *  particular:
34 *
35 *    - The tree (cache) may be limited in size by the number of
36 *      entries permitted or the amount of memory used.  When either
37 *      limit is exceeded cache entries are removed until the cache
38 *      conforms.
39 *    - Some statistical information is kept so that an approximate
40 *      "hit ratio" can be calculated.
41 *    - There are several functions available that provide access to
42 *      and management of cache size limits, hit ratio, and tree
43 *      trimming.
44 *
45 *  The splay tree is used because recently accessed items tend toward the
46 *  top of the tree and less recently accessed items tend toward the bottom.
47 *  This makes it easy to purge less recently used items should the cache
48 *  exceed its limits.
49 *
50 *  To use this module, you will need to supply a comparison function of
51 *  type ubi_trCompFunc and a node-freeing function of type
52 *  ubi_trKillNodeRtn.  See ubi_BinTree.h for more information on
53 *  these.  (This is all basic ubiqx tree management stuff.)
54 *
55 *  Notes:
56 *
57 *  - Cache performance will start to suffer dramatically if the
58 *    cache becomes large enough to force the OS to start swapping
59 *    memory to disk.  This is because the nodes of the underlying tree
60 *    will be scattered across memory in an order that is completely
61 *    unrelated to their traversal order.  As more and more of the
62 *    cache is placed into swap space, more and more swaps will be
63 *    required for a simple traversal (...and then there's the splay
64 *    operation).
65 *
66 *    In one simple test under Linux, the load and dump of a cache of
67 *    400,000 entries took only 1min, 40sec of real time.  The same
68 *    test with 450,000 records took 2 *hours* and eight minutes.
69 *
70 *  - In an effort to save memory, I considered using an unsigned
71 *    short to save the per-entry entry size.  I would have tucked this
72 *    value into some unused space in the tree node structure.  On
73 *    32-bit word aligned systems this would have saved an additional
74 *    four bytes per entry.  I may revisit this issue, but for now I've
75 *    decided against it.
76 *
77 *    Using an unsigned short would limit the size of an entry to 64K
78 *    bytes.  That's probably more than enough for most applications.
79 *    The key word in that last sentence, however, is "probably".  I
80 *    really dislike imposing such limits on things.
81 *
82 *  - Each entry keeps track of the amount of memory it used and the
83 *    cache header keeps the total.  This information is provided via
84 *    the EntrySize parameter in ubi_cachePut(), so it is up to you to
85 *    make sure that the numbers are accurate.  (The numbers don't even
86 *    have to represent bytes used.)
87 *
88 *    As you consider this, note that the strdup() function--as an
89 *    example--will call malloc().  The latter generally allocates a
90 *    multiple of the system word size, which may be more than the
91 *    number of bytes needed to store the string.
92 *
93 * -------------------------------------------------------------------------- **
94 *
95 *  Log: ubi_Cache.h,v
96 *  Revision 0.4  1999/09/22 03:42:24  crh
97 *  Fixed a minor typo.
98 *
99 *  Revision 0.3  1998/06/03 18:00:15  crh
100 *  Further fiddling with sys_include.h, which is no longer explicitly
101 *  included by this module since it is inherited from ubi_BinTree.h.
102 *
103 *  Revision 0.2  1998/06/02 01:36:18  crh
104 *  Changed include name from ubi_null.h to sys_include.h to make it
105 *  more generic.
106 *
107 *  Revision 0.1  1998/05/20 04:36:02  crh
108 *  The C file now includes ubi_null.h.  See ubi_null.h for more info.
109 *
110 *  Revision 0.0  1997/12/18 06:25:23  crh
111 *  Initial Revision.
112 *
113 * ========================================================================== **
114 */
115
116#include "ubi_SplayTree.h"
117
118/* -------------------------------------------------------------------------- **
119 * Typedefs...
120 *
121 *  ubi_cacheRoot     - Cache header structure, which consists of a binary
122 *                      tree root and other required housekeeping fields, as
123 *                      listed below.
124 *  ubi_cacheRootPtr  - Pointer to a Cache.
125 *
126 *  ubi_cacheEntry    - A cache Entry, which consists of a tree node
127 *                      structure and the size (in bytes) of the entry
128 *                      data.  The entry size should be supplied via
129 *                      the EntrySize parameter of the ubi_cachePut()
130 *                      function.
131 *
132 *  ubi_cacheEntryPtr - Pointer to a ubi_cacheEntry.
133 *
134 */
135
136typedef struct
137  {
138  ubi_trRoot        root;         /* Splay tree control structure.      */
139  ubi_trKillNodeRtn free_func;    /* Function used to free entries.     */
140  unsigned long     max_entries;  /* Max cache entries.  0 == unlimited */
141  unsigned long     max_memory;   /* Max memory to use.  0 == unlimited */
142  unsigned long     mem_used;     /* Memory currently in use (bytes).   */
143  unsigned short    cache_hits;   /* Incremented on succesful find.     */
144  unsigned short    cache_trys;   /* Incremented on cache lookup.       */
145  } ubi_cacheRoot;
146
147typedef ubi_cacheRoot *ubi_cacheRootPtr;
148
149
150typedef struct
151  {
152  ubi_trNode    node;           /* Tree node structure. */
153  unsigned long entry_size;     /* Entry size.  Used when managing
154                                 * caches with maximum memory limits.
155                                 */
156  } ubi_cacheEntry;
157
158typedef ubi_cacheEntry *ubi_cacheEntryPtr;
159
160
161/* -------------------------------------------------------------------------- **
162 * Macros...
163 *
164 *  ubi_cacheGetMaxEntries()  - Report the current maximum number of entries
165 *                              allowed in the cache.  Zero indicates no
166 *                              maximum.
167 *  ubi_cacheGetMaxMemory()   - Report the current maximum amount of memory
168 *                              that may be used in the cache.  Zero
169 *                              indicates no maximum.
170 *  ubi_cacheGetEntryCount()  - Report the current number of entries in the
171 *                              cache.
172 *  ubi_cacheGetMemUsed()     - Report the amount of memory currently in use
173 *                              by the cache.
174 */
175
176#define ubi_cacheGetMaxEntries( Cptr ) (((ubi_cacheRootPtr)(Cptr))->max_entries)
177#define ubi_cacheGetMaxMemory(  Cptr ) (((ubi_cacheRootPtr)(Cptr))->max_memory)
178
179#define ubi_cacheGetEntryCount( Cptr ) (((ubi_cacheRootPtr)(Cptr))->root.count)
180#define ubi_cacheGetMemUsed( Cptr ) (((ubi_cacheRootPtr)(Cptr))->mem_used)
181
182/* -------------------------------------------------------------------------- **
183 * Prototypes...
184 */
185
186ubi_cacheRootPtr ubi_cacheInit( ubi_cacheRootPtr  CachePtr,
187                                ubi_trCompFunc    CompFunc,
188                                ubi_trKillNodeRtn FreeFunc,
189                                unsigned long     MaxEntries,
190                                unsigned long     MaxMemory );
191  /* ------------------------------------------------------------------------ **
192   * Initialize a cache header structure.
193   *
194   *  Input:  CachePtr    - A pointer to a ubi_cacheRoot structure that is
195   *                        to be initialized.
196   *          CompFunc    - A pointer to the function that will be called
197   *                        to compare two cache values.  See the module
198   *                        comments, above, for more information.
199   *          FreeFunc    - A pointer to a function that will be called
200   *                        to free a cache entry.  If you allocated
201   *                        the cache entry using malloc(), then this
202   *                        will likely be free().  If you are allocating
203   *                        cache entries from a free list, then this will
204   *                        likely be a function that returns memory to the
205   *                        free list, etc.
206   *          MaxEntries  - The maximum number of entries that will be
207   *                        allowed to exist in the cache.  If this limit
208   *                        is exceeded, then existing entries will be
209   *                        removed from the cache.  A value of zero
210   *                        indicates that there is no limit on the number
211   *                        of cache entries.  See ubi_cachePut().
212   *          MaxMemory   - The maximum amount of memory, in bytes, to be
213   *                        allocated to the cache (excluding the cache
214   *                        header).  If this is exceeded, existing entries
215   *                        in the cache will be removed until enough memory
216   *                        has been freed to meet the condition.  See
217   *                        ubi_cachePut().
218   *
219   *  Output: A pointer to the initialized cache (i.e., the same as CachePtr).
220   *
221   *  Notes:  Both MaxEntries and MaxMemory may be changed after the cache
222   *          has been created.  See
223   *            ubi_cacheSetMaxEntries()
224   *            ubi_cacheSetMaxMemory()
225   *            ubi_cacheGetMaxEntries()
226   *            ubi_cacheGetMaxMemory()    (the latter two are macros).
227   *
228   *        - Memory is allocated in multiples of the word size.  The
229   *          return value of the strlen() function does not reflect
230   *          this; it will allways be less than or equal to the amount
231   *          of memory actually allocated.  Keep this in mind when
232   *          choosing a value for MaxMemory.
233   *
234   * ------------------------------------------------------------------------ **
235   */
236
237ubi_cacheRootPtr ubi_cacheClear( ubi_cacheRootPtr CachePtr );
238  /* ------------------------------------------------------------------------ **
239   * Remove and free all entries in an existing cache.
240   *
241   *  Input:  CachePtr  - A pointer to the cache that is to be cleared.
242   *
243   *  Output: A pointer to the cache header (i.e., the same as CachePtr).
244   *          This function re-initializes the cache header.
245   *
246   * ------------------------------------------------------------------------ **
247   */
248
249void ubi_cachePut( ubi_cacheRootPtr  CachePtr,
250                   unsigned long     EntrySize,
251                   ubi_cacheEntryPtr EntryPtr,
252                   ubi_trItemPtr     Key );
253  /* ------------------------------------------------------------------------ **
254   * Add an entry to the cache.
255   *
256   *  Input:  CachePtr  - A pointer to the cache into which the entry
257   *                      will be added.
258   *          EntrySize - The size, in bytes, of the memory block indicated
259   *                      by EntryPtr.  This will be copied into the
260   *                      EntryPtr->entry_size field.
261   *          EntryPtr  - A pointer to a memory block that begins with a
262   *                      ubi_cacheEntry structure.  The entry structure
263   *                      should be followed immediately by the data to be
264   *                      cached (even if that is a pointer to yet more data).
265   *          Key       - Pointer used to identify the lookup key within the
266   *                      Entry.
267   *
268   *  Output: None.
269   *
270   *  Notes:  After adding the new node, the cache is "trimmed".  This
271   *          removes extra nodes if the tree has exceeded it's memory or
272   *          entry count limits.  It is unlikely that the newly added node
273   *          will be purged from the cache (assuming a reasonably large
274   *          cache), since new nodes in a splay tree (which is what this
275   *          module was designed to use) are moved to the top of the tree
276   *          and the cache purge process removes nodes from the bottom of
277   *          the tree.
278   *        - The underlying splay tree is opened in OVERWRITE mode.  If
279   *          the input key matches an existing key, the existing entry will
280   *          be politely removed from the tree and freed.
281   *        - Memory is allocated in multiples of the word size.  The
282   *          return value of the strlen() function does not reflect
283   *          this; it will allways be less than or equal to the amount
284   *          of memory actually allocated.
285   *
286   * ------------------------------------------------------------------------ **
287   */
288
289ubi_cacheEntryPtr ubi_cacheGet( ubi_cacheRootPtr CachePtr,
290                                ubi_trItemPtr    FindMe );
291  /* ------------------------------------------------------------------------ **
292   * Attempt to retrieve an entry from the cache.
293   *
294   *  Input:  CachePtr  - A ponter to the cache that is to be searched.
295   *          FindMe    - A ubi_trItemPtr that indicates the key for which
296   *                      to search.
297   *
298   *  Output: A pointer to the cache entry that was found, or NULL if no
299   *          matching entry was found.
300   *
301   *  Notes:  This function also updates the hit ratio counters.
302   *          The counters are unsigned short.  If the number of cache tries
303   *          reaches 32768, then both the number of tries and the number of
304   *          hits are divided by two.  This prevents the counters from
305   *          overflowing.  See the comments in ubi_cacheHitRatio() for
306   *          additional notes.
307   *
308   * ------------------------------------------------------------------------ **
309   */
310
311ubi_trBool ubi_cacheDelete( ubi_cacheRootPtr CachePtr, ubi_trItemPtr DeleteMe );
312  /* ------------------------------------------------------------------------ **
313   * Find and delete the specified cache entry.
314   *
315   *  Input:  CachePtr  - A pointer to the cache.
316   *          DeleteMe  - The key of the entry to be deleted.
317   *
318   *  Output: TRUE if the entry was found & freed, else FALSE.
319   *
320   * ------------------------------------------------------------------------ **
321   */
322
323ubi_trBool ubi_cacheReduce( ubi_cacheRootPtr CachePtr, unsigned long count );
324  /* ------------------------------------------------------------------------ **
325   * Remove <count> entries from the bottom of the cache.
326   *
327   *  Input:  CachePtr  - A pointer to the cache which is to be reduced in
328   *                      size.
329   *          count     - The number of entries to remove.
330   *
331   *  Output: The function will return TRUE if <count> entries were removed,
332   *          else FALSE.  A return value of FALSE should indicate that
333   *          there were less than <count> entries in the cache, and that the
334   *          cache is now empty.
335   *
336   *  Notes:  This function forces a reduction in the number of cache entries
337   *          without requiring that the MaxMemory or MaxEntries values be
338   *          changed.
339   *
340   * ------------------------------------------------------------------------ **
341   */
342
343unsigned long ubi_cacheSetMaxEntries( ubi_cacheRootPtr CachePtr,
344                                      unsigned long    NewSize );
345  /* ------------------------------------------------------------------------ **
346   * Change the maximum number of entries allowed to exist in the cache.
347   *
348   *  Input:  CachePtr  - A pointer to the cache to be modified.
349   *          NewSize   - The new maximum number of cache entries.
350   *
351   *  Output: The maximum number of entries previously allowed to exist in
352   *          the cache.
353   *
354   *  Notes:  If the new size is less than the old size, this function will
355   *          trim the cache (remove excess entries).
356   *        - A value of zero indicates an unlimited number of entries.
357   *
358   * ------------------------------------------------------------------------ **
359   */
360
361unsigned long ubi_cacheSetMaxMemory( ubi_cacheRootPtr CachePtr,
362                                     unsigned long    NewSize );
363  /* ------------------------------------------------------------------------ **
364   * Change the maximum amount of memory to be used for storing cache
365   * entries.
366   *
367   *  Input:  CachePtr  - A pointer to the cache to be modified.
368   *          NewSize   - The new cache memory size.
369   *
370   *  Output: The previous maximum memory size.
371   *
372   *  Notes:  If the new size is less than the old size, this function will
373   *          trim the cache (remove excess entries).
374   *        - A value of zero indicates that the cache has no memory limit.
375   *
376   * ------------------------------------------------------------------------ **
377   */
378
379int ubi_cacheHitRatio( ubi_cacheRootPtr CachePtr );
380  /* ------------------------------------------------------------------------ **
381   * Returns a value that is 10,000 times the slightly weighted average hit
382   * ratio for the cache.
383   *
384   *  Input:  CachePtr  - Pointer to the cache to be queried.
385   *
386   *  Output: An integer that is 10,000 times the number of successful
387   *          cache hits divided by the number of cache lookups, or:
388   *            (10000 * hits) / trys
389   *          You can easily convert this to a float, or do something
390   *          like this (where i is the return value of this function):
391   *
392   *            printf( "Hit rate   : %d.%02d%%\n", (i/100), (i%100) );
393   *
394   *  Notes:  I say "slightly-weighted", because the numerator and
395   *          denominator are both accumulated in locations of type
396   *          'unsigned short'.  If the number of cache trys becomes
397   *          large enough, both are divided  by two.  (See function
398   *          ubi_cacheGet().)
399   *          Dividing both numerator and denominator by two does not
400   *          change the ratio (much...it is an integer divide), but it
401   *          does mean that subsequent increments to either counter will
402   *          have twice as much significance as previous ones.
403   *
404   *        - The value returned by this function will be in the range
405   *          [0..10000] because ( 0 <= cache_hits <= cache_trys ) will
406   *          always be true.
407   *
408   * ------------------------------------------------------------------------ **
409   */
410
411/* -------------------------------------------------------------------------- */
412#endif /* ubi_CACHE_H */
413