159191Skris=pod
259191Skris
359191Skris=head1 NAME
459191Skris
5109998Smarkmlh_new, lh_free, lh_insert, lh_delete, lh_retrieve, lh_doall, lh_doall_arg, lh_error - dynamic hash table
659191Skris
759191Skris=head1 SYNOPSIS
859191Skris
959191Skris #include <openssl/lhash.h>
1059191Skris
11238405Sjkim DECLARE_LHASH_OF(<type>);
1259191Skris
13238405Sjkim LHASH *lh_<type>_new();
14238405Sjkim void lh_<type>_free(LHASH_OF(<type> *table);
1559191Skris
16238405Sjkim <type> *lh_<type>_insert(LHASH_OF(<type> *table, <type> *data);
17238405Sjkim <type> *lh_<type>_delete(LHASH_OF(<type> *table, <type> *data);
18238405Sjkim <type> *lh_retrieve(LHASH_OF<type> *table, <type> *data);
1959191Skris
20238405Sjkim void lh_<type>_doall(LHASH_OF(<type> *table, LHASH_DOALL_FN_TYPE func);
21238405Sjkim void lh_<type>_doall_arg(LHASH_OF(<type> *table, LHASH_DOALL_ARG_FN_TYPE func,
22238405Sjkim          <type2>, <type2> *arg);
2359191Skris
24238405Sjkim int lh_<type>_error(LHASH_OF(<type> *table);
25238405Sjkim
26109998Smarkm typedef int (*LHASH_COMP_FN_TYPE)(const void *, const void *);
27109998Smarkm typedef unsigned long (*LHASH_HASH_FN_TYPE)(const void *);
28109998Smarkm typedef void (*LHASH_DOALL_FN_TYPE)(const void *);
29109998Smarkm typedef void (*LHASH_DOALL_ARG_FN_TYPE)(const void *, const void *);
30109998Smarkm
3159191Skris=head1 DESCRIPTION
3259191Skris
33238405SjkimThis library implements type-checked dynamic hash tables. The hash
34238405Sjkimtable entries can be arbitrary structures. Usually they consist of key
35238405Sjkimand value fields.
3659191Skris
37238405Sjkimlh_<type>_new() creates a new B<LHASH_OF(<type>> structure to store
38238405Sjkimarbitrary data entries, and provides the 'hash' and 'compare'
39238405Sjkimcallbacks to be used in organising the table's entries.  The B<hash>
40238405Sjkimcallback takes a pointer to a table entry as its argument and returns
41238405Sjkiman unsigned long hash value for its key field.  The hash value is
42238405Sjkimnormally truncated to a power of 2, so make sure that your hash
43238405Sjkimfunction returns well mixed low order bits.  The B<compare> callback
44238405Sjkimtakes two arguments (pointers to two hash table entries), and returns
45238405Sjkim0 if their keys are equal, non-zero otherwise.  If your hash table
46238405Sjkimwill contain items of some particular type and the B<hash> and
47238405SjkimB<compare> callbacks hash/compare these types, then the
48238405SjkimB<DECLARE_LHASH_HASH_FN> and B<IMPLEMENT_LHASH_COMP_FN> macros can be
49238405Sjkimused to create callback wrappers of the prototypes required by
50238405Sjkimlh_<type>_new().  These provide per-variable casts before calling the
51238405Sjkimtype-specific callbacks written by the application author.  These
52238405Sjkimmacros, as well as those used for the "doall" callbacks, are defined
53238405Sjkimas;
5459191Skris
55238405Sjkim #define DECLARE_LHASH_HASH_FN(name, o_type) \
56238405Sjkim	 unsigned long name##_LHASH_HASH(const void *);
57238405Sjkim #define IMPLEMENT_LHASH_HASH_FN(name, o_type) \
58238405Sjkim	 unsigned long name##_LHASH_HASH(const void *arg) { \
59238405Sjkim		 const o_type *a = arg; \
60238405Sjkim		 return name##_hash(a); }
61238405Sjkim #define LHASH_HASH_FN(name) name##_LHASH_HASH
62109998Smarkm
63238405Sjkim #define DECLARE_LHASH_COMP_FN(name, o_type) \
64238405Sjkim	 int name##_LHASH_COMP(const void *, const void *);
65238405Sjkim #define IMPLEMENT_LHASH_COMP_FN(name, o_type) \
66238405Sjkim	 int name##_LHASH_COMP(const void *arg1, const void *arg2) { \
67238405Sjkim		 const o_type *a = arg1;		    \
68238405Sjkim		 const o_type *b = arg2; \
69238405Sjkim		 return name##_cmp(a,b); }
70238405Sjkim #define LHASH_COMP_FN(name) name##_LHASH_COMP
71109998Smarkm
72238405Sjkim #define DECLARE_LHASH_DOALL_FN(name, o_type) \
73238405Sjkim	 void name##_LHASH_DOALL(void *);
74238405Sjkim #define IMPLEMENT_LHASH_DOALL_FN(name, o_type) \
75238405Sjkim	 void name##_LHASH_DOALL(void *arg) { \
76238405Sjkim		 o_type *a = arg; \
77238405Sjkim		 name##_doall(a); }
78238405Sjkim #define LHASH_DOALL_FN(name) name##_LHASH_DOALL
79109998Smarkm
80238405Sjkim #define DECLARE_LHASH_DOALL_ARG_FN(name, o_type, a_type) \
81238405Sjkim	 void name##_LHASH_DOALL_ARG(void *, void *);
82238405Sjkim #define IMPLEMENT_LHASH_DOALL_ARG_FN(name, o_type, a_type) \
83238405Sjkim	 void name##_LHASH_DOALL_ARG(void *arg1, void *arg2) { \
84238405Sjkim		 o_type *a = arg1; \
85238405Sjkim		 a_type *b = arg2; \
86238405Sjkim		 name##_doall_arg(a, b); }
87238405Sjkim #define LHASH_DOALL_ARG_FN(name) name##_LHASH_DOALL_ARG
88109998Smarkm
89238405Sjkim An example of a hash table storing (pointers to) structures of type 'STUFF'
90238405Sjkim could be defined as follows;
91109998Smarkm
92109998Smarkm /* Calculates the hash value of 'tohash' (implemented elsewhere) */
93109998Smarkm unsigned long STUFF_hash(const STUFF *tohash);
94109998Smarkm /* Orders 'arg1' and 'arg2' (implemented elsewhere) */
95238405Sjkim int stuff_cmp(const STUFF *arg1, const STUFF *arg2);
96109998Smarkm /* Create the type-safe wrapper functions for use in the LHASH internals */
97238405Sjkim static IMPLEMENT_LHASH_HASH_FN(stuff, STUFF);
98238405Sjkim static IMPLEMENT_LHASH_COMP_FN(stuff, STUFF);
99109998Smarkm /* ... */
100109998Smarkm int main(int argc, char *argv[]) {
101109998Smarkm         /* Create the new hash table using the hash/compare wrappers */
102238405Sjkim         LHASH_OF(STUFF) *hashtable = lh_STUFF_new(LHASH_HASH_FN(STUFF_hash),
103109998Smarkm                                   LHASH_COMP_FN(STUFF_cmp));
104109998Smarkm	 /* ... */
105109998Smarkm }
106109998Smarkm
107238405Sjkimlh_<type>_free() frees the B<LHASH_OF(<type>> structure
108238405SjkimB<table>. Allocated hash table entries will not be freed; consider
109238405Sjkimusing lh_<type>_doall() to deallocate any remaining entries in the
110238405Sjkimhash table (see below).
11159191Skris
112238405Sjkimlh_<type>_insert() inserts the structure pointed to by B<data> into
113238405SjkimB<table>.  If there already is an entry with the same key, the old
114238405Sjkimvalue is replaced. Note that lh_<type>_insert() stores pointers, the
115238405Sjkimdata are not copied.
11659191Skris
117238405Sjkimlh_<type>_delete() deletes an entry from B<table>.
11859191Skris
119238405Sjkimlh_<type>_retrieve() looks up an entry in B<table>. Normally, B<data>
120238405Sjkimis a structure with the key field(s) set; the function will return a
12159191Skrispointer to a fully populated structure.
12259191Skris
123238405Sjkimlh_<type>_doall() will, for every entry in the hash table, call
124238405SjkimB<func> with the data item as its parameter.  For lh_<type>_doall()
125238405Sjkimand lh_<type>_doall_arg(), function pointer casting should be avoided
126238405Sjkimin the callbacks (see B<NOTE>) - instead use the declare/implement
127238405Sjkimmacros to create type-checked wrappers that cast variables prior to
128238405Sjkimcalling your type-specific callbacks.  An example of this is
129238405Sjkimillustrated here where the callback is used to cleanup resources for
130238405Sjkimitems in the hash table prior to the hashtable itself being
131238405Sjkimdeallocated:
13259191Skris
133109998Smarkm /* Cleans up resources belonging to 'a' (this is implemented elsewhere) */
134238405Sjkim void STUFF_cleanup_doall(STUFF *a);
135109998Smarkm /* Implement a prototype-compatible wrapper for "STUFF_cleanup" */
136238405Sjkim IMPLEMENT_LHASH_DOALL_FN(STUFF_cleanup, STUFF)
137109998Smarkm         /* ... then later in the code ... */
138109998Smarkm /* So to run "STUFF_cleanup" against all items in a hash table ... */
139238405Sjkim lh_STUFF_doall(hashtable, LHASH_DOALL_FN(STUFF_cleanup));
140109998Smarkm /* Then the hash table itself can be deallocated */
141238405Sjkim lh_STUFF_free(hashtable);
14259191Skris
143109998SmarkmWhen doing this, be careful if you delete entries from the hash table
144109998Smarkmin your callbacks: the table may decrease in size, moving the item
145109998Smarkmthat you are currently on down lower in the hash table - this could
146109998Smarkmcause some entries to be skipped during the iteration.  The second
147109998Smarkmbest solution to this problem is to set hash-E<gt>down_load=0 before
148109998Smarkmyou start (which will stop the hash table ever decreasing in size).
149109998SmarkmThe best solution is probably to avoid deleting items from the hash
150109998Smarkmtable inside a "doall" callback!
151109998Smarkm
152238405Sjkimlh_<type>_doall_arg() is the same as lh_<type>_doall() except that
153238405SjkimB<func> will be called with B<arg> as the second argument and B<func>
154238405Sjkimshould be of type B<LHASH_DOALL_ARG_FN_TYPE> (a callback prototype
155238405Sjkimthat is passed both the table entry and an extra argument).  As with
156238405Sjkimlh_doall(), you can instead choose to declare your callback with a
157238405Sjkimprototype matching the types you are dealing with and use the
158238405Sjkimdeclare/implement macros to create compatible wrappers that cast
159238405Sjkimvariables before calling your type-specific callbacks.  An example of
160238405Sjkimthis is demonstrated here (printing all hash table entries to a BIO
161238405Sjkimthat is provided by the caller):
162109998Smarkm
163109998Smarkm /* Prints item 'a' to 'output_bio' (this is implemented elsewhere) */
164238405Sjkim void STUFF_print_doall_arg(const STUFF *a, BIO *output_bio);
165109998Smarkm /* Implement a prototype-compatible wrapper for "STUFF_print" */
166238405Sjkim static IMPLEMENT_LHASH_DOALL_ARG_FN(STUFF, const STUFF, BIO)
167109998Smarkm         /* ... then later in the code ... */
168109998Smarkm /* Print out the entire hashtable to a particular BIO */
169238405Sjkim lh_STUFF_doall_arg(hashtable, LHASH_DOALL_ARG_FN(STUFF_print), BIO,
170238405Sjkim                    logging_bio);
171109998Smarkm 
172238405Sjkimlh_<type>_error() can be used to determine if an error occurred in the last
173238405Sjkimoperation. lh_<type>_error() is a macro.
17459191Skris
17559191Skris=head1 RETURN VALUES
17659191Skris
177238405Sjkimlh_<type>_new() returns B<NULL> on error, otherwise a pointer to the new
17859191SkrisB<LHASH> structure.
17959191Skris
180238405SjkimWhen a hash table entry is replaced, lh_<type>_insert() returns the value
18159191Skrisbeing replaced. B<NULL> is returned on normal operation and on error.
18259191Skris
183238405Sjkimlh_<type>_delete() returns the entry being deleted.  B<NULL> is returned if
18459191Skristhere is no such value in the hash table.
18559191Skris
186238405Sjkimlh_<type>_retrieve() returns the hash table entry if it has been found,
18759191SkrisB<NULL> otherwise.
18859191Skris
189238405Sjkimlh_<type>_error() returns 1 if an error occurred in the last operation, 0
19059191Skrisotherwise.
19159191Skris
192238405Sjkimlh_<type>_free(), lh_<type>_doall() and lh_<type>_doall_arg() return no values.
19359191Skris
194109998Smarkm=head1 NOTE
195109998Smarkm
196109998SmarkmThe various LHASH macros and callback types exist to make it possible
197238405Sjkimto write type-checked code without resorting to function-prototype
198109998Smarkmcasting - an evil that makes application code much harder to
199109998Smarkmaudit/verify and also opens the window of opportunity for stack
200109998Smarkmcorruption and other hard-to-find bugs.  It also, apparently, violates
201109998SmarkmANSI-C.
202109998Smarkm
203109998SmarkmThe LHASH code regards table entries as constant data.  As such, it
204109998Smarkminternally represents lh_insert()'d items with a "const void *"
205109998Smarkmpointer type.  This is why callbacks such as those used by lh_doall()
206109998Smarkmand lh_doall_arg() declare their prototypes with "const", even for the
207109998Smarkmparameters that pass back the table items' data pointers - for
208109998Smarkmconsistency, user-provided data is "const" at all times as far as the
209109998SmarkmLHASH code is concerned.  However, as callers are themselves providing
210109998Smarkmthese pointers, they can choose whether they too should be treating
211109998Smarkmall such parameters as constant.
212109998Smarkm
213109998SmarkmAs an example, a hash table may be maintained by code that, for
214109998Smarkmreasons of encapsulation, has only "const" access to the data being
215109998Smarkmindexed in the hash table (ie. it is returned as "const" from
216109998Smarkmelsewhere in their code) - in this case the LHASH prototypes are
217109998Smarkmappropriate as-is.  Conversely, if the caller is responsible for the
218109998Smarkmlife-time of the data in question, then they may well wish to make
219109998Smarkmmodifications to table item passed back in the lh_doall() or
220109998Smarkmlh_doall_arg() callbacks (see the "STUFF_cleanup" example above).  If
221109998Smarkmso, the caller can either cast the "const" away (if they're providing
222109998Smarkmthe raw callbacks themselves) or use the macros to declare/implement
223109998Smarkmthe wrapper functions without "const" types.
224109998Smarkm
225109998SmarkmCallers that only have "const" access to data they're indexing in a
226109998Smarkmtable, yet declare callbacks without constant types (or cast the
227109998Smarkm"const" away themselves), are therefore creating their own risks/bugs
228109998Smarkmwithout being encouraged to do so by the API.  On a related note,
229109998Smarkmthose auditing code should pay special attention to any instances of
230109998SmarkmDECLARE/IMPLEMENT_LHASH_DOALL_[ARG_]_FN macros that provide types
231109998Smarkmwithout any "const" qualifiers.
232109998Smarkm
23359191Skris=head1 BUGS
23459191Skris
235238405Sjkimlh_<type>_insert() returns B<NULL> both for success and error.
23659191Skris
23759191Skris=head1 INTERNALS
23859191Skris
23959191SkrisThe following description is based on the SSLeay documentation:
24059191Skris
24159191SkrisThe B<lhash> library implements a hash table described in the
24259191SkrisI<Communications of the ACM> in 1991.  What makes this hash table
24359191Skrisdifferent is that as the table fills, the hash table is increased (or
24468651Skrisdecreased) in size via OPENSSL_realloc().  When a 'resize' is done, instead of
24559191Skrisall hashes being redistributed over twice as many 'buckets', one
24659191Skrisbucket is split.  So when an 'expand' is done, there is only a minimal
24759191Skriscost to redistribute some values.  Subsequent inserts will cause more
24859191Skrissingle 'bucket' redistributions but there will never be a sudden large
24959191Skriscost due to redistributing all the 'buckets'.
25059191Skris
25159191SkrisThe state for a particular hash table is kept in the B<LHASH> structure.
25259191SkrisThe decision to increase or decrease the hash table size is made
25359191Skrisdepending on the 'load' of the hash table.  The load is the number of
25459191Skrisitems in the hash table divided by the size of the hash table.  The
25559191Skrisdefault values are as follows.  If (hash->up_load E<lt> load) =E<gt>
25659191Skrisexpand.  if (hash-E<gt>down_load E<gt> load) =E<gt> contract.  The
25759191SkrisB<up_load> has a default value of 1 and B<down_load> has a default value
25859191Skrisof 2.  These numbers can be modified by the application by just
25959191Skrisplaying with the B<up_load> and B<down_load> variables.  The 'load' is
26059191Skriskept in a form which is multiplied by 256.  So
26159191Skrishash-E<gt>up_load=8*256; will cause a load of 8 to be set.
26259191Skris
26359191SkrisIf you are interested in performance the field to watch is
26459191Skrisnum_comp_calls.  The hash library keeps track of the 'hash' value for
26559191Skriseach item so when a lookup is done, the 'hashes' are compared, if
26659191Skristhere is a match, then a full compare is done, and
26759191Skrishash-E<gt>num_comp_calls is incremented.  If num_comp_calls is not equal
26859191Skristo num_delete plus num_retrieve it means that your hash function is
26959191Skrisgenerating hashes that are the same for different values.  It is
27059191Skrisprobably worth changing your hash function if this is the case because
27159191Skriseven if your hash table has 10 items in a 'bucket', it can be searched
27259191Skriswith 10 B<unsigned long> compares and 10 linked list traverses.  This
273109998Smarkmwill be much less expensive that 10 calls to your compare function.
27459191Skris
27559191Skrislh_strhash() is a demo string hashing function:
27659191Skris
27759191Skris unsigned long lh_strhash(const char *c);
27859191Skris
27959191SkrisSince the B<LHASH> routines would normally be passed structures, this
280238405Sjkimroutine would not normally be passed to lh_<type>_new(), rather it would be
281238405Sjkimused in the function passed to lh_<type>_new().
28259191Skris
28359191Skris=head1 SEE ALSO
28459191Skris
28559191SkrisL<lh_stats(3)|lh_stats(3)>
28659191Skris
28759191Skris=head1 HISTORY
28859191Skris
28959191SkrisThe B<lhash> library is available in all versions of SSLeay and OpenSSL.
29059191Skrislh_error() was added in SSLeay 0.9.1b.
29159191Skris
29259191SkrisThis manpage is derived from the SSLeay documentation.
29359191Skris
294109998SmarkmIn OpenSSL 0.9.7, all lhash functions that were passed function pointers
295109998Smarkmwere changed for better type safety, and the function types LHASH_COMP_FN_TYPE,
296109998SmarkmLHASH_HASH_FN_TYPE, LHASH_DOALL_FN_TYPE and LHASH_DOALL_ARG_FN_TYPE 
297109998Smarkmbecame available.
298109998Smarkm
299238405SjkimIn OpenSSL 1.0.0, the lhash interface was revamped for even better
300238405Sjkimtype checking.
301238405Sjkim
30259191Skris=cut
303