1228060Sbapt/* This may look like C code, but it is really -*- C++ -*- */ 2228060Sbapt 3228060Sbapt/* Search algorithm. 4228060Sbapt 5228060Sbapt Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. 6228060Sbapt Written by Douglas C. Schmidt <schmidt@ics.uci.edu> 7228060Sbapt and Bruno Haible <bruno@clisp.org>. 8228060Sbapt 9228060Sbapt This file is part of GNU GPERF. 10228060Sbapt 11228060Sbapt GNU GPERF is free software; you can redistribute it and/or modify 12228060Sbapt it under the terms of the GNU General Public License as published by 13228060Sbapt the Free Software Foundation; either version 2, or (at your option) 14228060Sbapt any later version. 15228060Sbapt 16228060Sbapt GNU GPERF is distributed in the hope that it will be useful, 17228060Sbapt but WITHOUT ANY WARRANTY; without even the implied warranty of 18228060Sbapt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19228060Sbapt GNU General Public License for more details. 20228060Sbapt 21228060Sbapt You should have received a copy of the GNU General Public License 22228060Sbapt along with this program; see the file COPYING. 23228060Sbapt If not, write to the Free Software Foundation, Inc., 24228060Sbapt 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 25228060Sbapt 26228060Sbapt#ifndef search_h 27228060Sbapt#define search_h 1 28228060Sbapt 29228060Sbapt#include "keyword-list.h" 30228060Sbapt#include "positions.h" 31228060Sbapt#include "bool-array.h" 32228060Sbapt 33228060Sbaptstruct EquivalenceClass; 34228060Sbapt 35228060Sbaptclass Search 36228060Sbapt{ 37228060Sbaptpublic: 38228060Sbapt Search (KeywordExt_List *list); 39228060Sbapt ~Search (); 40228060Sbapt void optimize (); 41228060Sbaptprivate: 42228060Sbapt void prepare (); 43228060Sbapt 44228060Sbapt /* Computes the upper bound on the indices passed to asso_values[], 45228060Sbapt assuming no alpha_increments. */ 46228060Sbapt unsigned int compute_alpha_size () const; 47228060Sbapt 48228060Sbapt /* Computes the unification rules between different asso_values[c], 49228060Sbapt assuming no alpha_increments. */ 50228060Sbapt unsigned int * compute_alpha_unify () const; 51228060Sbapt 52228060Sbapt /* Initializes each keyword's _selchars array. */ 53228060Sbapt void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const; 54228060Sbapt /* Deletes each keyword's _selchars array. */ 55228060Sbapt void delete_selchars () const; 56228060Sbapt 57228060Sbapt /* Count the duplicate keywords that occur with a given set of positions. */ 58228060Sbapt unsigned int count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const; 59228060Sbapt 60228060Sbapt /* Find good key positions. */ 61228060Sbapt void find_positions (); 62228060Sbapt 63228060Sbapt /* Count the duplicate keywords that occur with the found set of positions. */ 64228060Sbapt unsigned int count_duplicates_tuple () const; 65228060Sbapt 66228060Sbapt /* Computes the upper bound on the indices passed to asso_values[]. */ 67228060Sbapt unsigned int compute_alpha_size (const unsigned int *alpha_inc) const; 68228060Sbapt 69228060Sbapt /* Computes the unification rules between different asso_values[c]. */ 70228060Sbapt unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const; 71228060Sbapt 72228060Sbapt /* Initializes each keyword's _selchars array. */ 73228060Sbapt void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const; 74228060Sbapt 75228060Sbapt /* Count the duplicate keywords that occur with the given set of positions 76228060Sbapt and a given alpha_inc[] array. */ 77228060Sbapt unsigned int count_duplicates_multiset (const unsigned int *alpha_inc) const; 78228060Sbapt 79228060Sbapt /* Find good _alpha_inc[]. */ 80228060Sbapt void find_alpha_inc (); 81228060Sbapt 82228060Sbapt /* Initializes the asso_values[] related parameters. */ 83228060Sbapt void prepare_asso_values (); 84228060Sbapt 85228060Sbapt EquivalenceClass * compute_partition (bool *undetermined) const; 86228060Sbapt 87228060Sbapt unsigned int count_possible_collisions (EquivalenceClass *partition, unsigned int c) const; 88228060Sbapt 89228060Sbapt bool unchanged_partition (EquivalenceClass *partition, unsigned int c) const; 90228060Sbapt 91228060Sbapt /* Finds some _asso_values[] that fit. */ 92228060Sbapt void find_asso_values (); 93228060Sbapt 94228060Sbapt /* Computes a keyword's hash value, relative to the current _asso_values[], 95228060Sbapt and stores it in keyword->_hash_value. */ 96228060Sbapt int compute_hash (KeywordExt *keyword) const; 97228060Sbapt 98228060Sbapt /* Finds good _asso_values[]. */ 99228060Sbapt void find_good_asso_values (); 100228060Sbapt 101228060Sbapt /* Sorts the keyword list by hash value. */ 102228060Sbapt void sort (); 103228060Sbapt 104228060Sbaptpublic: 105228060Sbapt 106228060Sbapt /* Linked list of keywords. */ 107228060Sbapt KeywordExt_List * _head; 108228060Sbapt 109228060Sbapt /* Total number of keywords, counting duplicates. */ 110228060Sbapt int _total_keys; 111228060Sbapt 112228060Sbapt /* Maximum length of the longest keyword. */ 113228060Sbapt int _max_key_len; 114228060Sbapt 115228060Sbapt /* Minimum length of the shortest keyword. */ 116228060Sbapt int _min_key_len; 117228060Sbapt 118228060Sbapt /* User-specified or computed key positions. */ 119228060Sbapt Positions _key_positions; 120228060Sbapt 121228060Sbapt /* Adjustments to add to bytes add specific key positions. */ 122228060Sbapt unsigned int * _alpha_inc; 123228060Sbapt 124228060Sbapt /* Size of alphabet. */ 125228060Sbapt unsigned int _alpha_size; 126228060Sbapt 127228060Sbapt /* Alphabet character unification, either the identity or a mapping from 128228060Sbapt upper case characters to lower case characters (and maybe more). */ 129228060Sbapt unsigned int * _alpha_unify; 130228060Sbapt 131228060Sbapt /* Maximum _selchars_length over all keywords. */ 132228060Sbapt unsigned int _max_selchars_length; 133228060Sbapt 134228060Sbapt /* Total number of duplicates that have been moved to _duplicate_link lists 135228060Sbapt (not counting their representatives which stay on the main list). */ 136228060Sbapt int _total_duplicates; 137228060Sbapt 138228060Sbapt /* Counts occurrences of each key set character. 139228060Sbapt _occurrences[c] is the number of times that c occurs among the _selchars 140228060Sbapt of a keyword. */ 141228060Sbapt int * _occurrences; 142228060Sbapt /* Value associated with each character. */ 143228060Sbapt int * _asso_values; 144228060Sbapt 145228060Sbaptprivate: 146228060Sbapt 147228060Sbapt /* Length of _head list. Number of keywords, not counting duplicates. */ 148228060Sbapt int _list_len; 149228060Sbapt 150228060Sbapt /* Exclusive upper bound for every _asso_values[c]. A power of 2. */ 151228060Sbapt unsigned int _asso_value_max; 152228060Sbapt 153228060Sbapt /* Initial value for asso_values table. -1 means random. */ 154228060Sbapt int _initial_asso_value; 155228060Sbapt /* Jump length when trying alternative values. 0 means random. */ 156228060Sbapt int _jump; 157228060Sbapt 158228060Sbapt /* Maximal possible hash value. */ 159228060Sbapt int _max_hash_value; 160228060Sbapt 161228060Sbapt /* Sparse bit vector for collision detection. */ 162228060Sbapt Bool_Array * _collision_detector; 163228060Sbapt}; 164228060Sbapt 165228060Sbapt#endif 166