1228060Sbapt/* Keyword data. 2228060Sbapt Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. 3228060Sbapt Written by Douglas C. Schmidt <schmidt@ics.uci.edu> 4228060Sbapt and Bruno Haible <bruno@clisp.org>. 5228060Sbapt 6228060Sbapt This file is part of GNU GPERF. 7228060Sbapt 8228060Sbapt GNU GPERF is free software; you can redistribute it and/or modify 9228060Sbapt it under the terms of the GNU General Public License as published by 10228060Sbapt the Free Software Foundation; either version 2, or (at your option) 11228060Sbapt any later version. 12228060Sbapt 13228060Sbapt GNU GPERF is distributed in the hope that it will be useful, 14228060Sbapt but WITHOUT ANY WARRANTY; without even the implied warranty of 15228060Sbapt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16228060Sbapt GNU General Public License for more details. 17228060Sbapt 18228060Sbapt You should have received a copy of the GNU General Public License 19228060Sbapt along with this program; see the file COPYING. 20228060Sbapt If not, write to the Free Software Foundation, Inc., 21228060Sbapt 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 22228060Sbapt 23228060Sbapt/* Specification. */ 24228060Sbapt#include "keyword.h" 25228060Sbapt 26228060Sbapt#include <stddef.h> 27228060Sbapt#include <stdio.h> 28228060Sbapt#include <stdlib.h> 29228060Sbapt#include "positions.h" 30228060Sbapt 31228060Sbapt 32228060Sbapt/* --------------------------- KeywordExt class --------------------------- */ 33228060Sbapt 34228060Sbapt/* Sort a small set of 'unsigned int', base[0..len-1], in place. */ 35228060Sbaptstatic inline void sort_char_set (unsigned int *base, int len) 36228060Sbapt{ 37228060Sbapt /* Bubble sort is sufficient here. */ 38228060Sbapt for (int i = 1; i < len; i++) 39228060Sbapt { 40228060Sbapt int j; 41228060Sbapt unsigned int tmp; 42228060Sbapt 43228060Sbapt for (j = i, tmp = base[j]; j > 0 && tmp < base[j - 1]; j--) 44228060Sbapt base[j] = base[j - 1]; 45228060Sbapt 46228060Sbapt base[j] = tmp; 47228060Sbapt } 48228060Sbapt} 49228060Sbapt 50228060Sbapt/* Initializes selchars and selchars_length. 51228060Sbapt 52228060Sbapt General idea: 53228060Sbapt The hash function will be computed as 54228060Sbapt asso_values[allchars[key_pos[0]]] + 55228060Sbapt asso_values[allchars[key_pos[1]]] + ... 56228060Sbapt We compute selchars as the multiset 57228060Sbapt { allchars[key_pos[0]], allchars[key_pos[1]], ... } 58228060Sbapt so that the hash function becomes 59228060Sbapt asso_values[selchars[0]] + asso_values[selchars[1]] + ... 60228060Sbapt Furthermore we sort the selchars array, to ease detection of duplicates 61228060Sbapt later. 62228060Sbapt 63228060Sbapt More in detail: The arguments alpha_unify (used for case-insensitive 64228060Sbapt hash functions) and alpha_inc (used to disambiguate permutations) 65228060Sbapt apply slight modifications. The hash function will be computed as 66228060Sbapt sum (j=0,1,...: k = key_pos[j]: 67228060Sbapt asso_values[alpha_unify[allchars[k]+alpha_inc[k]]]) 68228060Sbapt + (allchars_length if !option[NOLENGTH], 0 otherwise). 69228060Sbapt We compute selchars as the multiset 70228060Sbapt { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] } 71228060Sbapt so that the hash function becomes 72228060Sbapt asso_values[selchars[0]] + asso_values[selchars[1]] + ... 73228060Sbapt + (allchars_length if !option[NOLENGTH], 0 otherwise). 74228060Sbapt */ 75228060Sbapt 76228060Sbaptunsigned int * 77228060SbaptKeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) 78228060Sbapt{ 79228060Sbapt /* Iterate through the list of positions, initializing selchars 80228060Sbapt (via ptr). */ 81228060Sbapt PositionIterator iter = positions.iterator(_allchars_length); 82228060Sbapt 83228060Sbapt unsigned int *key_set = new unsigned int[iter.remaining()]; 84228060Sbapt unsigned int *ptr = key_set; 85228060Sbapt 86228060Sbapt for (int i; (i = iter.next ()) != PositionIterator::EOS; ) 87228060Sbapt { 88228060Sbapt unsigned int c; 89228060Sbapt if (i == Positions::LASTCHAR) 90228060Sbapt /* Special notation for last KEY position, i.e. '$'. */ 91228060Sbapt c = static_cast<unsigned char>(_allchars[_allchars_length - 1]); 92228060Sbapt else if (i < _allchars_length) 93228060Sbapt { 94228060Sbapt /* Within range of KEY length, so we'll keep it. */ 95228060Sbapt c = static_cast<unsigned char>(_allchars[i]); 96228060Sbapt if (alpha_inc) 97228060Sbapt c += alpha_inc[i]; 98228060Sbapt } 99228060Sbapt else 100228060Sbapt /* Out of range of KEY length, the iterator should not have 101228060Sbapt produced this. */ 102228060Sbapt abort (); 103228060Sbapt if (alpha_unify) 104228060Sbapt c = alpha_unify[c]; 105228060Sbapt *ptr = c; 106228060Sbapt ptr++; 107228060Sbapt } 108228060Sbapt 109228060Sbapt _selchars = key_set; 110228060Sbapt _selchars_length = ptr - key_set; 111228060Sbapt 112228060Sbapt return key_set; 113228060Sbapt} 114228060Sbapt 115228060Sbaptvoid 116228060SbaptKeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) 117228060Sbapt{ 118228060Sbapt init_selchars_low (positions, alpha_unify, NULL); 119228060Sbapt} 120228060Sbapt 121228060Sbaptvoid 122228060SbaptKeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) 123228060Sbapt{ 124228060Sbapt unsigned int *selchars = 125228060Sbapt init_selchars_low (positions, alpha_unify, alpha_inc); 126228060Sbapt 127228060Sbapt /* Sort the selchars elements alphabetically. */ 128228060Sbapt sort_char_set (selchars, _selchars_length); 129228060Sbapt} 130228060Sbapt 131228060Sbapt/* Deletes selchars. */ 132228060Sbaptvoid 133228060SbaptKeywordExt::delete_selchars () 134228060Sbapt{ 135228060Sbapt delete[] const_cast<unsigned int *>(_selchars); 136228060Sbapt} 137228060Sbapt 138228060Sbapt 139228060Sbapt/* ------------------------- Keyword_Factory class ------------------------- */ 140228060Sbapt 141228060SbaptKeyword_Factory::Keyword_Factory () 142228060Sbapt{ 143228060Sbapt} 144228060Sbapt 145228060SbaptKeyword_Factory::~Keyword_Factory () 146228060Sbapt{ 147228060Sbapt} 148228060Sbapt 149228060Sbapt 150228060Sbapt/* ------------------------------------------------------------------------- */ 151228060Sbapt 152228060Sbaptchar empty_string[1] = ""; 153228060Sbapt 154228060Sbapt 155228060Sbapt#ifndef __OPTIMIZE__ 156228060Sbapt 157228060Sbapt#define INLINE /* not inline */ 158228060Sbapt#include "keyword.icc" 159228060Sbapt#undef INLINE 160228060Sbapt 161228060Sbapt#endif /* not defined __OPTIMIZE__ */ 162