keyword.cc revision 230237
1/* Keyword data. 2 Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. 3 Written by Douglas C. Schmidt <schmidt@ics.uci.edu> 4 and Bruno Haible <bruno@clisp.org>. 5 6 This file is part of GNU GPERF. 7 8 GNU GPERF is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 GNU GPERF is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; see the file COPYING. 20 If not, write to the Free Software Foundation, Inc., 21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23/* Specification. */ 24#include "keyword.h" 25 26#include <stddef.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include "positions.h" 30 31 32/* --------------------------- KeywordExt class --------------------------- */ 33 34/* Sort a small set of 'unsigned int', base[0..len-1], in place. */ 35static inline void sort_char_set (unsigned int *base, int len) 36{ 37 /* Bubble sort is sufficient here. */ 38 for (int i = 1; i < len; i++) 39 { 40 int j; 41 unsigned int tmp; 42 43 for (j = i, tmp = base[j]; j > 0 && tmp < base[j - 1]; j--) 44 base[j] = base[j - 1]; 45 46 base[j] = tmp; 47 } 48} 49 50/* Initializes selchars and selchars_length. 51 52 General idea: 53 The hash function will be computed as 54 asso_values[allchars[key_pos[0]]] + 55 asso_values[allchars[key_pos[1]]] + ... 56 We compute selchars as the multiset 57 { allchars[key_pos[0]], allchars[key_pos[1]], ... } 58 so that the hash function becomes 59 asso_values[selchars[0]] + asso_values[selchars[1]] + ... 60 Furthermore we sort the selchars array, to ease detection of duplicates 61 later. 62 63 More in detail: The arguments alpha_unify (used for case-insensitive 64 hash functions) and alpha_inc (used to disambiguate permutations) 65 apply slight modifications. The hash function will be computed as 66 sum (j=0,1,...: k = key_pos[j]: 67 asso_values[alpha_unify[allchars[k]+alpha_inc[k]]]) 68 + (allchars_length if !option[NOLENGTH], 0 otherwise). 69 We compute selchars as the multiset 70 { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] } 71 so that the hash function becomes 72 asso_values[selchars[0]] + asso_values[selchars[1]] + ... 73 + (allchars_length if !option[NOLENGTH], 0 otherwise). 74 */ 75 76unsigned int * 77KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) 78{ 79 /* Iterate through the list of positions, initializing selchars 80 (via ptr). */ 81 PositionIterator iter = positions.iterator(_allchars_length); 82 83 unsigned int *key_set = new unsigned int[iter.remaining()]; 84 unsigned int *ptr = key_set; 85 86 for (int i; (i = iter.next ()) != PositionIterator::EOS; ) 87 { 88 unsigned int c; 89 if (i == Positions::LASTCHAR) 90 /* Special notation for last KEY position, i.e. '$'. */ 91 c = static_cast<unsigned char>(_allchars[_allchars_length - 1]); 92 else if (i < _allchars_length) 93 { 94 /* Within range of KEY length, so we'll keep it. */ 95 c = static_cast<unsigned char>(_allchars[i]); 96 if (alpha_inc) 97 c += alpha_inc[i]; 98 } 99 else 100 /* Out of range of KEY length, the iterator should not have 101 produced this. */ 102 abort (); 103 if (alpha_unify) 104 c = alpha_unify[c]; 105 *ptr = c; 106 ptr++; 107 } 108 109 _selchars = key_set; 110 _selchars_length = ptr - key_set; 111 112 return key_set; 113} 114 115void 116KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) 117{ 118 init_selchars_low (positions, alpha_unify, NULL); 119} 120 121void 122KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) 123{ 124 unsigned int *selchars = 125 init_selchars_low (positions, alpha_unify, alpha_inc); 126 127 /* Sort the selchars elements alphabetically. */ 128 sort_char_set (selchars, _selchars_length); 129} 130 131/* Deletes selchars. */ 132void 133KeywordExt::delete_selchars () 134{ 135 delete[] const_cast<unsigned int *>(_selchars); 136} 137 138 139/* ------------------------- Keyword_Factory class ------------------------- */ 140 141Keyword_Factory::Keyword_Factory () 142{ 143} 144 145Keyword_Factory::~Keyword_Factory () 146{ 147} 148 149 150/* ------------------------------------------------------------------------- */ 151 152char empty_string[1] = ""; 153 154 155#ifndef __OPTIMIZE__ 156 157#define INLINE /* not inline */ 158#include "keyword.icc" 159#undef INLINE 160 161#endif /* not defined __OPTIMIZE__ */ 162