1228060Sbapt/* This may look like C code, but it is really -*- C++ -*- */
2228060Sbapt
3228060Sbapt/* Search algorithm.
4228060Sbapt
5228060Sbapt   Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc.
6228060Sbapt   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
7228060Sbapt   and Bruno Haible <bruno@clisp.org>.
8228060Sbapt
9228060Sbapt   This file is part of GNU GPERF.
10228060Sbapt
11228060Sbapt   GNU GPERF is free software; you can redistribute it and/or modify
12228060Sbapt   it under the terms of the GNU General Public License as published by
13228060Sbapt   the Free Software Foundation; either version 2, or (at your option)
14228060Sbapt   any later version.
15228060Sbapt
16228060Sbapt   GNU GPERF is distributed in the hope that it will be useful,
17228060Sbapt   but WITHOUT ANY WARRANTY; without even the implied warranty of
18228060Sbapt   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19228060Sbapt   GNU General Public License for more details.
20228060Sbapt
21228060Sbapt   You should have received a copy of the GNU General Public License
22228060Sbapt   along with this program; see the file COPYING.
23228060Sbapt   If not, write to the Free Software Foundation, Inc.,
24228060Sbapt   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
25228060Sbapt
26228060Sbapt#ifndef search_h
27228060Sbapt#define search_h 1
28228060Sbapt
29228060Sbapt#include "keyword-list.h"
30228060Sbapt#include "positions.h"
31228060Sbapt#include "bool-array.h"
32228060Sbapt
33228060Sbaptstruct EquivalenceClass;
34228060Sbapt
35228060Sbaptclass Search
36228060Sbapt{
37228060Sbaptpublic:
38228060Sbapt                        Search (KeywordExt_List *list);
39228060Sbapt                        ~Search ();
40228060Sbapt  void                  optimize ();
41228060Sbaptprivate:
42228060Sbapt  void                  prepare ();
43228060Sbapt
44228060Sbapt  /* Computes the upper bound on the indices passed to asso_values[],
45228060Sbapt     assuming no alpha_increments.  */
46228060Sbapt  unsigned int          compute_alpha_size () const;
47228060Sbapt
48228060Sbapt  /* Computes the unification rules between different asso_values[c],
49228060Sbapt     assuming no alpha_increments.  */
50228060Sbapt  unsigned int *        compute_alpha_unify () const;
51228060Sbapt
52228060Sbapt  /* Initializes each keyword's _selchars array.  */
53228060Sbapt  void                  init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
54228060Sbapt  /* Deletes each keyword's _selchars array.  */
55228060Sbapt  void                  delete_selchars () const;
56228060Sbapt
57228060Sbapt  /* Count the duplicate keywords that occur with a given set of positions.  */
58228060Sbapt  unsigned int          count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
59228060Sbapt
60228060Sbapt  /* Find good key positions.  */
61228060Sbapt  void                  find_positions ();
62228060Sbapt
63228060Sbapt  /* Count the duplicate keywords that occur with the found set of positions.  */
64228060Sbapt  unsigned int          count_duplicates_tuple () const;
65228060Sbapt
66228060Sbapt  /* Computes the upper bound on the indices passed to asso_values[].  */
67228060Sbapt  unsigned int          compute_alpha_size (const unsigned int *alpha_inc) const;
68228060Sbapt
69228060Sbapt  /* Computes the unification rules between different asso_values[c].  */
70228060Sbapt  unsigned int *        compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;
71228060Sbapt
72228060Sbapt  /* Initializes each keyword's _selchars array.  */
73228060Sbapt  void                  init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
74228060Sbapt
75228060Sbapt  /* Count the duplicate keywords that occur with the given set of positions
76228060Sbapt     and a given alpha_inc[] array.  */
77228060Sbapt  unsigned int          count_duplicates_multiset (const unsigned int *alpha_inc) const;
78228060Sbapt
79228060Sbapt  /* Find good _alpha_inc[].  */
80228060Sbapt  void                  find_alpha_inc ();
81228060Sbapt
82228060Sbapt  /* Initializes the asso_values[] related parameters.  */
83228060Sbapt  void                  prepare_asso_values ();
84228060Sbapt
85228060Sbapt  EquivalenceClass *    compute_partition (bool *undetermined) const;
86228060Sbapt
87228060Sbapt  unsigned int          count_possible_collisions (EquivalenceClass *partition, unsigned int c) const;
88228060Sbapt
89228060Sbapt  bool                  unchanged_partition (EquivalenceClass *partition, unsigned int c) const;
90228060Sbapt
91228060Sbapt  /* Finds some _asso_values[] that fit.  */
92228060Sbapt  void                  find_asso_values ();
93228060Sbapt
94228060Sbapt  /* Computes a keyword's hash value, relative to the current _asso_values[],
95228060Sbapt     and stores it in keyword->_hash_value.  */
96228060Sbapt  int                   compute_hash (KeywordExt *keyword) const;
97228060Sbapt
98228060Sbapt  /* Finds good _asso_values[].  */
99228060Sbapt  void                  find_good_asso_values ();
100228060Sbapt
101228060Sbapt  /* Sorts the keyword list by hash value.  */
102228060Sbapt  void                  sort ();
103228060Sbapt
104228060Sbaptpublic:
105228060Sbapt
106228060Sbapt  /* Linked list of keywords.  */
107228060Sbapt  KeywordExt_List *     _head;
108228060Sbapt
109228060Sbapt  /* Total number of keywords, counting duplicates.  */
110228060Sbapt  int                   _total_keys;
111228060Sbapt
112228060Sbapt  /* Maximum length of the longest keyword.  */
113228060Sbapt  int                   _max_key_len;
114228060Sbapt
115228060Sbapt  /* Minimum length of the shortest keyword.  */
116228060Sbapt  int                   _min_key_len;
117228060Sbapt
118228060Sbapt  /* User-specified or computed key positions.  */
119228060Sbapt  Positions             _key_positions;
120228060Sbapt
121228060Sbapt  /* Adjustments to add to bytes add specific key positions.  */
122228060Sbapt  unsigned int *        _alpha_inc;
123228060Sbapt
124228060Sbapt  /* Size of alphabet.  */
125228060Sbapt  unsigned int          _alpha_size;
126228060Sbapt
127228060Sbapt  /* Alphabet character unification, either the identity or a mapping from
128228060Sbapt     upper case characters to lower case characters (and maybe more).  */
129228060Sbapt  unsigned int *        _alpha_unify;
130228060Sbapt
131228060Sbapt  /* Maximum _selchars_length over all keywords.  */
132228060Sbapt  unsigned int          _max_selchars_length;
133228060Sbapt
134228060Sbapt  /* Total number of duplicates that have been moved to _duplicate_link lists
135228060Sbapt     (not counting their representatives which stay on the main list).  */
136228060Sbapt  int                   _total_duplicates;
137228060Sbapt
138228060Sbapt  /* Counts occurrences of each key set character.
139228060Sbapt     _occurrences[c] is the number of times that c occurs among the _selchars
140228060Sbapt     of a keyword.  */
141228060Sbapt  int *                 _occurrences;
142228060Sbapt  /* Value associated with each character. */
143228060Sbapt  int *                 _asso_values;
144228060Sbapt
145228060Sbaptprivate:
146228060Sbapt
147228060Sbapt  /* Length of _head list.  Number of keywords, not counting duplicates.  */
148228060Sbapt  int                   _list_len;
149228060Sbapt
150228060Sbapt  /* Exclusive upper bound for every _asso_values[c].  A power of 2.  */
151228060Sbapt  unsigned int          _asso_value_max;
152228060Sbapt
153228060Sbapt  /* Initial value for asso_values table.  -1 means random.  */
154228060Sbapt  int                   _initial_asso_value;
155228060Sbapt  /* Jump length when trying alternative values.  0 means random.  */
156228060Sbapt  int                   _jump;
157228060Sbapt
158228060Sbapt  /* Maximal possible hash value.  */
159228060Sbapt  int                   _max_hash_value;
160228060Sbapt
161228060Sbapt  /* Sparse bit vector for collision detection.  */
162228060Sbapt  Bool_Array *          _collision_detector;
163228060Sbapt};
164228060Sbapt
165228060Sbapt#endif
166