1/* This may look like C code, but it is really -*- C++ -*- */
2
3/* Search algorithm.
4
5   Copyright (C) 1989-1998, 2000, 2002, 2009 Free Software Foundation, Inc.
6   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
7   and Bruno Haible <bruno@clisp.org>.
8
9   This file is part of GNU GPERF.
10
11   This program is free software: you can redistribute it and/or modify
12   it under the terms of the GNU General Public License as published by
13   the Free Software Foundation; either version 3 of the License, or
14   (at your option) any later version.
15
16   This program is distributed in the hope that it will be useful,
17   but WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   GNU General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
23
24#ifndef search_h
25#define search_h 1
26
27#include "keyword-list.h"
28#include "positions.h"
29#include "bool-array.h"
30
31struct EquivalenceClass;
32
33class Search
34{
35public:
36                        Search (KeywordExt_List *list);
37                        ~Search ();
38  void                  optimize ();
39private:
40  void                  prepare ();
41
42  /* Computes the upper bound on the indices passed to asso_values[],
43     assuming no alpha_increments.  */
44  unsigned int          compute_alpha_size () const;
45
46  /* Computes the unification rules between different asso_values[c],
47     assuming no alpha_increments.  */
48  unsigned int *        compute_alpha_unify () const;
49
50  /* Initializes each keyword's _selchars array.  */
51  void                  init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
52  /* Deletes each keyword's _selchars array.  */
53  void                  delete_selchars () const;
54
55  /* Count the duplicate keywords that occur with a given set of positions.  */
56  unsigned int          count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
57
58  /* Find good key positions.  */
59  void                  find_positions ();
60
61  /* Count the duplicate keywords that occur with the found set of positions.  */
62  unsigned int          count_duplicates_tuple () const;
63
64  /* Computes the upper bound on the indices passed to asso_values[].  */
65  unsigned int          compute_alpha_size (const unsigned int *alpha_inc) const;
66
67  /* Computes the unification rules between different asso_values[c].  */
68  unsigned int *        compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;
69
70  /* Initializes each keyword's _selchars array.  */
71  void                  init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
72
73  /* Count the duplicate keywords that occur with the given set of positions
74     and a given alpha_inc[] array.  */
75  unsigned int          count_duplicates_multiset (const unsigned int *alpha_inc) const;
76
77  /* Find good _alpha_inc[].  */
78  void                  find_alpha_inc ();
79
80  /* Initializes the asso_values[] related parameters.  */
81  void                  prepare_asso_values ();
82
83  EquivalenceClass *    compute_partition (bool *undetermined) const;
84
85  unsigned int          count_possible_collisions (EquivalenceClass *partition, unsigned int c) const;
86
87  bool                  unchanged_partition (EquivalenceClass *partition, unsigned int c) const;
88
89  /* Finds some _asso_values[] that fit.  */
90  void                  find_asso_values ();
91
92  /* Computes a keyword's hash value, relative to the current _asso_values[],
93     and stores it in keyword->_hash_value.  */
94  int                   compute_hash (KeywordExt *keyword) const;
95
96  /* Finds good _asso_values[].  */
97  void                  find_good_asso_values ();
98
99  /* Sorts the keyword list by hash value.  */
100  void                  sort ();
101
102public:
103
104  /* Linked list of keywords.  */
105  KeywordExt_List *     _head;
106
107  /* Total number of keywords, counting duplicates.  */
108  int                   _total_keys;
109
110  /* Maximum length of the longest keyword.  */
111  int                   _max_key_len;
112
113  /* Minimum length of the shortest keyword.  */
114  int                   _min_key_len;
115
116  /* Whether the hash function includes the length.  */
117  bool                  _hash_includes_len;
118
119  /* User-specified or computed key positions.  */
120  Positions             _key_positions;
121
122  /* Adjustments to add to bytes add specific key positions.  */
123  unsigned int *        _alpha_inc;
124
125  /* Size of alphabet.  */
126  unsigned int          _alpha_size;
127
128  /* Alphabet character unification, either the identity or a mapping from
129     upper case characters to lower case characters (and maybe more).  */
130  unsigned int *        _alpha_unify;
131
132  /* Maximum _selchars_length over all keywords.  */
133  unsigned int          _max_selchars_length;
134
135  /* Total number of duplicates that have been moved to _duplicate_link lists
136     (not counting their representatives which stay on the main list).  */
137  int                   _total_duplicates;
138
139  /* Counts occurrences of each key set character.
140     _occurrences[c] is the number of times that c occurs among the _selchars
141     of a keyword.  */
142  int *                 _occurrences;
143  /* Value associated with each character. */
144  int *                 _asso_values;
145
146private:
147
148  /* Length of _head list.  Number of keywords, not counting duplicates.  */
149  int                   _list_len;
150
151  /* Exclusive upper bound for every _asso_values[c].  A power of 2.  */
152  unsigned int          _asso_value_max;
153
154  /* Initial value for asso_values table.  -1 means random.  */
155  int                   _initial_asso_value;
156  /* Jump length when trying alternative values.  0 means random.  */
157  int                   _jump;
158
159  /* Maximal possible hash value.  */
160  int                   _max_hash_value;
161
162  /* Sparse bit vector for collision detection.  */
163  Bool_Array *          _collision_detector;
164};
165
166#endif
167