1228060Sbapt/* Keyword data.
2228060Sbapt   Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc.
3228060Sbapt   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
4228060Sbapt   and Bruno Haible <bruno@clisp.org>.
5228060Sbapt
6228060Sbapt   This file is part of GNU GPERF.
7228060Sbapt
8228060Sbapt   GNU GPERF is free software; you can redistribute it and/or modify
9228060Sbapt   it under the terms of the GNU General Public License as published by
10228060Sbapt   the Free Software Foundation; either version 2, or (at your option)
11228060Sbapt   any later version.
12228060Sbapt
13228060Sbapt   GNU GPERF is distributed in the hope that it will be useful,
14228060Sbapt   but WITHOUT ANY WARRANTY; without even the implied warranty of
15228060Sbapt   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16228060Sbapt   GNU General Public License for more details.
17228060Sbapt
18228060Sbapt   You should have received a copy of the GNU General Public License
19228060Sbapt   along with this program; see the file COPYING.
20228060Sbapt   If not, write to the Free Software Foundation, Inc.,
21228060Sbapt   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
22228060Sbapt
23228060Sbapt/* Specification. */
24228060Sbapt#include "keyword.h"
25228060Sbapt
26228060Sbapt#include <stddef.h>
27228060Sbapt#include <stdio.h>
28228060Sbapt#include <stdlib.h>
29228060Sbapt#include "positions.h"
30228060Sbapt
31228060Sbapt
32228060Sbapt/* --------------------------- KeywordExt class --------------------------- */
33228060Sbapt
34228060Sbapt/* Sort a small set of 'unsigned int', base[0..len-1], in place.  */
35228060Sbaptstatic inline void sort_char_set (unsigned int *base, int len)
36228060Sbapt{
37228060Sbapt  /* Bubble sort is sufficient here.  */
38228060Sbapt  for (int i = 1; i < len; i++)
39228060Sbapt    {
40228060Sbapt      int j;
41228060Sbapt      unsigned int tmp;
42228060Sbapt
43228060Sbapt      for (j = i, tmp = base[j]; j > 0 && tmp < base[j - 1]; j--)
44228060Sbapt        base[j] = base[j - 1];
45228060Sbapt
46228060Sbapt      base[j] = tmp;
47228060Sbapt    }
48228060Sbapt}
49228060Sbapt
50228060Sbapt/* Initializes selchars and selchars_length.
51228060Sbapt
52228060Sbapt   General idea:
53228060Sbapt     The hash function will be computed as
54228060Sbapt         asso_values[allchars[key_pos[0]]] +
55228060Sbapt         asso_values[allchars[key_pos[1]]] + ...
56228060Sbapt     We compute selchars as the multiset
57228060Sbapt         { allchars[key_pos[0]], allchars[key_pos[1]], ... }
58228060Sbapt     so that the hash function becomes
59228060Sbapt         asso_values[selchars[0]] + asso_values[selchars[1]] + ...
60228060Sbapt   Furthermore we sort the selchars array, to ease detection of duplicates
61228060Sbapt   later.
62228060Sbapt
63228060Sbapt   More in detail: The arguments alpha_unify (used for case-insensitive
64228060Sbapt   hash functions) and alpha_inc (used to disambiguate permutations)
65228060Sbapt   apply slight modifications. The hash function will be computed as
66228060Sbapt       sum (j=0,1,...: k = key_pos[j]:
67228060Sbapt            asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
68228060Sbapt       + (allchars_length if !option[NOLENGTH], 0 otherwise).
69228060Sbapt   We compute selchars as the multiset
70228060Sbapt       { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
71228060Sbapt   so that the hash function becomes
72228060Sbapt       asso_values[selchars[0]] + asso_values[selchars[1]] + ...
73228060Sbapt       + (allchars_length if !option[NOLENGTH], 0 otherwise).
74228060Sbapt */
75228060Sbapt
76228060Sbaptunsigned int *
77228060SbaptKeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
78228060Sbapt{
79228060Sbapt  /* Iterate through the list of positions, initializing selchars
80228060Sbapt     (via ptr).  */
81228060Sbapt  PositionIterator iter = positions.iterator(_allchars_length);
82228060Sbapt
83228060Sbapt  unsigned int *key_set = new unsigned int[iter.remaining()];
84228060Sbapt  unsigned int *ptr = key_set;
85228060Sbapt
86228060Sbapt  for (int i; (i = iter.next ()) != PositionIterator::EOS; )
87228060Sbapt    {
88228060Sbapt      unsigned int c;
89228060Sbapt      if (i == Positions::LASTCHAR)
90228060Sbapt        /* Special notation for last KEY position, i.e. '$'.  */
91228060Sbapt        c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
92228060Sbapt      else if (i < _allchars_length)
93228060Sbapt        {
94228060Sbapt          /* Within range of KEY length, so we'll keep it.  */
95228060Sbapt          c = static_cast<unsigned char>(_allchars[i]);
96228060Sbapt          if (alpha_inc)
97228060Sbapt            c += alpha_inc[i];
98228060Sbapt        }
99228060Sbapt      else
100228060Sbapt        /* Out of range of KEY length, the iterator should not have
101228060Sbapt           produced this.  */
102228060Sbapt        abort ();
103228060Sbapt      if (alpha_unify)
104228060Sbapt        c = alpha_unify[c];
105228060Sbapt      *ptr = c;
106228060Sbapt      ptr++;
107228060Sbapt    }
108228060Sbapt
109228060Sbapt  _selchars = key_set;
110228060Sbapt  _selchars_length = ptr - key_set;
111228060Sbapt
112228060Sbapt  return key_set;
113228060Sbapt}
114228060Sbapt
115228060Sbaptvoid
116228060SbaptKeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify)
117228060Sbapt{
118228060Sbapt  init_selchars_low (positions, alpha_unify, NULL);
119228060Sbapt}
120228060Sbapt
121228060Sbaptvoid
122228060SbaptKeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
123228060Sbapt{
124228060Sbapt  unsigned int *selchars =
125228060Sbapt    init_selchars_low (positions, alpha_unify, alpha_inc);
126228060Sbapt
127228060Sbapt  /* Sort the selchars elements alphabetically.  */
128228060Sbapt  sort_char_set (selchars, _selchars_length);
129228060Sbapt}
130228060Sbapt
131228060Sbapt/* Deletes selchars.  */
132228060Sbaptvoid
133228060SbaptKeywordExt::delete_selchars ()
134228060Sbapt{
135228060Sbapt  delete[] const_cast<unsigned int *>(_selchars);
136228060Sbapt}
137228060Sbapt
138228060Sbapt
139228060Sbapt/* ------------------------- Keyword_Factory class ------------------------- */
140228060Sbapt
141228060SbaptKeyword_Factory::Keyword_Factory ()
142228060Sbapt{
143228060Sbapt}
144228060Sbapt
145228060SbaptKeyword_Factory::~Keyword_Factory ()
146228060Sbapt{
147228060Sbapt}
148228060Sbapt
149228060Sbapt
150228060Sbapt/* ------------------------------------------------------------------------- */
151228060Sbapt
152228060Sbaptchar empty_string[1] = "";
153228060Sbapt
154228060Sbapt
155228060Sbapt#ifndef __OPTIMIZE__
156228060Sbapt
157228060Sbapt#define INLINE /* not inline */
158228060Sbapt#include "keyword.icc"
159228060Sbapt#undef INLINE
160228060Sbapt
161228060Sbapt#endif /* not defined __OPTIMIZE__ */
162