1//
2//  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
3//
4/*
5***************************************************************************
6*   Copyright (C) 2002-2011 International Business Machines Corporation
7*   and others. All rights reserved.
8***************************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_BREAK_ITERATION
14
15#include "unicode/unistr.h"
16#include "unicode/uniset.h"
17#include "unicode/uchar.h"
18#include "unicode/parsepos.h"
19
20#include "umutex.h"
21
22#include "rbbirb.h"
23#include "rbbinode.h"
24
25
26//
27//  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
28//                                  when the hash table is deleted.
29//
30U_CDECL_BEGIN
31static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
32    icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
33    delete px;
34}
35U_CDECL_END
36
37
38
39U_NAMESPACE_BEGIN
40
41RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
42    :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
43{
44    fHashTable       = NULL;
45    fCachedSetLookup = NULL;
46
47    fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
48    // uhash_open checks status
49    if (U_FAILURE(status)) {
50        return;
51    }
52    uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
53}
54
55
56
57RBBISymbolTable::~RBBISymbolTable()
58{
59    uhash_close(fHashTable);
60}
61
62
63//
64//  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
65//                                looks up a variable name and returns a UnicodeString
66//                                containing the substitution text.
67//
68//                                The variable name does NOT include the leading $.
69//
70const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
71{
72    RBBISymbolTableEntry  *el;
73    RBBINode              *varRefNode;
74    RBBINode              *exprNode;
75    RBBINode              *usetNode;
76    const UnicodeString   *retString;
77    RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const
78
79    el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
80    if (el == NULL) {
81        return NULL;
82    }
83
84    varRefNode = el->val;
85    exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
86    if (exprNode->fType == RBBINode::setRef) {
87        // The $variable refers to a single UnicodeSet
88        //   return the ffffString, which will subsequently be interpreted as a
89        //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
90        usetNode = exprNode->fLeftChild;
91        This->fCachedSetLookup = usetNode->fInputSet;
92        retString = &ffffString;
93    }
94    else
95    {
96        // The variable refers to something other than just a set.
97        // return the original source string for the expression
98        retString = &exprNode->fText;
99        This->fCachedSetLookup = NULL;
100    }
101    return retString;
102}
103
104
105
106//
107//  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
108//                                   interface maps a single stand-in character to a
109//                                   pointer to a Unicode Set.   The Unicode Set code uses this
110//                                   mechanism to get all references to the same $variable
111//                                   name to refer to a single common Unicode Set instance.
112//
113//    This implementation cheats a little, and does not maintain a map of stand-in chars
114//    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
115//    constructor will always call this function right after calling lookup(),
116//    and we just need to remember what set to return between these two calls.
117const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
118{
119    UnicodeSet *retVal = NULL;
120    RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
121    if (ch == 0xffff) {
122        retVal = fCachedSetLookup;
123        This->fCachedSetLookup = 0;
124    }
125    return retVal;
126}
127
128//
129// RBBISymbolTable::parseReference   This function from the abstract symbol table interface
130//                                   looks for a $variable name in the source text.
131//                                   It does not look it up, only scans for it.
132//                                   It is used by the UnicodeSet parser.
133//
134//                                   This implementation is lifted pretty much verbatim
135//                                   from the rules based transliterator implementation.
136//                                   I didn't see an obvious way of sharing it.
137//
138UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
139                                                ParsePosition& pos, int32_t limit) const
140{
141    int32_t start = pos.getIndex();
142    int32_t i = start;
143    UnicodeString result;
144    while (i < limit) {
145        UChar c = text.charAt(i);
146        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
147            break;
148        }
149        ++i;
150    }
151    if (i == start) { // No valid name chars
152        return result; // Indicate failure with empty string
153    }
154    pos.setIndex(i);
155    text.extractBetween(start, i, result);
156    return result;
157}
158
159
160
161//
162// RBBISymbolTable::lookupNode      Given a key (a variable name), return the
163//                                  corresponding RBBI Node.  If there is no entry
164//                                  in the table for this name, return NULL.
165//
166RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
167
168    RBBINode             *retNode = NULL;
169    RBBISymbolTableEntry *el;
170
171    el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
172    if (el != NULL) {
173        retNode = el->val;
174    }
175    return retNode;
176}
177
178
179//
180//    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
181//                                  Indicate an error if the name already exists -
182//                                    this will only occur in the case of duplicate
183//                                    variable assignments.
184//
185void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
186    RBBISymbolTableEntry *e;
187    /* test for buffer overflows */
188    if (U_FAILURE(err)) {
189        return;
190    }
191    e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
192    if (e != NULL) {
193        err = U_BRK_VARIABLE_REDFINITION;
194        return;
195    }
196
197    e = new RBBISymbolTableEntry;
198    if (e == NULL) {
199        err = U_MEMORY_ALLOCATION_ERROR;
200        return;
201    }
202    e->key = key;
203    e->val = val;
204    uhash_put( fHashTable, &e->key, e, &err);
205}
206
207
208RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
209
210RBBISymbolTableEntry::~RBBISymbolTableEntry() {
211    // The "val" of a symbol table entry is a variable reference node.
212    // The l. child of the val is the rhs expression from the assignment.
213    // Unlike other node types, children of variable reference nodes are not
214    //    automatically recursively deleted.  We do it manually here.
215    delete val->fLeftChild;
216    val->fLeftChild = NULL;
217
218    delete  val;
219
220    // Note: the key UnicodeString is destructed by virtue of being in the object by value.
221}
222
223
224//
225//  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
226//
227#ifdef RBBI_DEBUG
228void RBBISymbolTable::rbbiSymtablePrint() const {
229    RBBIDebugPrintf("Variable Definitions\n"
230           "Name               Node Val     String Val\n"
231           "----------------------------------------------------------------------\n");
232
233    int32_t pos = -1;
234    const UHashElement  *e   = NULL;
235    for (;;) {
236        e = uhash_nextElement(fHashTable,  &pos);
237        if (e == NULL ) {
238            break;
239        }
240        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
241
242        RBBI_DEBUG_printUnicodeString(s->key, 15);
243        RBBIDebugPrintf("   %8p   ", (void *)s->val);
244        RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
245        RBBIDebugPrintf("\n");
246    }
247
248    RBBIDebugPrintf("\nParsed Variable Definitions\n");
249    pos = -1;
250    for (;;) {
251        e = uhash_nextElement(fHashTable,  &pos);
252        if (e == NULL ) {
253            break;
254        }
255        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
256        RBBI_DEBUG_printUnicodeString(s->key);
257        s->val->fLeftChild->printTree(TRUE);
258        RBBIDebugPrintf("\n");
259    }
260}
261#endif
262
263
264
265
266
267U_NAMESPACE_END
268
269#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
270