1/* 2****************************************************************************** 3* 4* Copyright (C) 2008-2011, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* file name: uspoof_conf.h 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2009Jan05 14* created by: Andy Heninger 15* 16* Internal classes for compiling confusable data into its binary (runtime) form. 17*/ 18 19#ifndef __USPOOF_BUILDCONF_H__ 20#define __USPOOF_BUILDCONF_H__ 21 22#if !UCONFIG_NO_NORMALIZATION 23 24#if !UCONFIG_NO_REGULAR_EXPRESSIONS 25 26#include "uspoof_impl.h" 27 28U_NAMESPACE_BEGIN 29 30// SPUString 31// Holds a string that is the result of one of the mappings defined 32// by the confusable mapping data (confusables.txt from Unicode.org) 33// Instances of SPUString exist during the compilation process only. 34 35struct SPUString : public UMemory { 36 UnicodeString *fStr; // The actual string. 37 int32_t fStrTableIndex; // Index into the final runtime data for this string. 38 // (or, for length 1, the single string char itself, 39 // there being no string table entry for it.) 40 SPUString(UnicodeString *s); 41 ~SPUString(); 42}; 43 44 45// String Pool A utility class for holding the strings that are the result of 46// the spoof mappings. These strings will utimately end up in the 47// run-time String Table. 48// This is sort of like a sorted set of strings, except that ICU's anemic 49// built-in collections don't support those, so it is implemented with a 50// combination of a uhash and a UVector. 51 52 53class SPUStringPool : public UMemory { 54 public: 55 SPUStringPool(UErrorCode &status); 56 ~SPUStringPool(); 57 58 // Add a string. Return the string from the table. 59 // If the input parameter string is already in the table, delete the 60 // input parameter and return the existing string. 61 SPUString *addString(UnicodeString *src, UErrorCode &status); 62 63 64 // Get the n-th string in the collection. 65 SPUString *getByIndex(int32_t i); 66 67 // Sort the contents; affects the ordering of getByIndex(). 68 void sort(UErrorCode &status); 69 70 int32_t size(); 71 72 private: 73 UVector *fVec; // Elements are SPUString * 74 UHashtable *fHash; // Key: UnicodeString Value: SPUString 75}; 76 77 78// class ConfusabledataBuilder 79// An instance of this class exists while the confusable data is being built from source. 80// It encapsulates the intermediate data structures that are used for building. 81// It exports one static function, to do a confusable data build. 82 83class ConfusabledataBuilder : public UMemory { 84 private: 85 SpoofImpl *fSpoofImpl; 86 UChar *fInput; 87 UHashtable *fSLTable; 88 UHashtable *fSATable; 89 UHashtable *fMLTable; 90 UHashtable *fMATable; 91 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. 92 93 // The binary data is first assembled into the following four collections, then 94 // copied to its final raw-memory destination. 95 UVector *fKeyVec; 96 UVector *fValueVec; 97 UnicodeString *fStringTable; 98 UVector *fStringLengthsTable; 99 100 SPUStringPool *stringPool; 101 URegularExpression *fParseLine; 102 URegularExpression *fParseHexNum; 103 int32_t fLineNum; 104 105 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); 106 ~ConfusabledataBuilder(); 107 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); 108 109 // Add an entry to the key and value tables being built 110 // input: data from SLTable, MATable, etc. 111 // outut: entry added to fKeyVec and fValueVec 112 void addKeyEntry(UChar32 keyChar, // The key character 113 UHashtable *table, // The table, one of SATable, MATable, etc. 114 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. 115 UErrorCode &status); 116 117 // From an index into fKeyVec & fValueVec 118 // get a UnicodeString with the corresponding mapping. 119 UnicodeString getMapping(int32_t index); 120 121 // Populate the final binary output data array with the compiled data. 122 void outputData(UErrorCode &status); 123 124 public: 125 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, 126 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); 127}; 128U_NAMESPACE_END 129 130#endif 131#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 132#endif // __USPOOF_BUILDCONF_H__ 133