1/*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package build.tools.generatebreakiteratordata;
27
28import java.util.Hashtable;
29import java.util.Vector;
30
31/**
32 * The Builder class for DictionaryBasedBreakIterator inherits almost all of
33 * its functionality from RuleBasedBreakIteratorBuilder, but extends it with
34 * extra logic to handle the "<dictionary>" token.
35 */
36class DictionaryBasedBreakIteratorBuilder extends RuleBasedBreakIteratorBuilder {
37
38    /**
39     * A list of flags indicating which character categories are contained in
40     * the dictionary file (this is used to determine which ranges of characters
41     * to apply the dictionary to)
42     */
43    private boolean[] categoryFlags;
44
45    /**
46     * A CharSet that contains all the characters represented in the dictionary
47     */
48    private CharSet dictionaryChars = new CharSet();
49    private String dictionaryExpression = "";
50
51    public DictionaryBasedBreakIteratorBuilder(String description) {
52        super(description);
53    }
54
55    /**
56     * We override handleSpecialSubstitution() to add logic to handle
57     * the <dictionary> tag.  If we see a substitution named "<dictionary>",
58     * parse the substitution expression and store the result in
59     * dictionaryChars.
60     */
61    protected void handleSpecialSubstitution(String replace, String replaceWith,
62                                             int startPos, String description) {
63        super.handleSpecialSubstitution(replace, replaceWith, startPos, description);
64
65        if (replace.equals("<dictionary>")) {
66            if (replaceWith.charAt(0) == '(') {
67                error("Dictionary group can't be enclosed in (", startPos, description);
68            }
69            dictionaryExpression = replaceWith;
70            dictionaryChars = CharSet.parseString(replaceWith);
71        }
72    }
73
74    /**
75     * The other half of the logic to handle the dictionary characters happens
76     * here. After the inherited builder has derived the real character
77     * categories, we set up the categoryFlags array in the iterator. This array
78     * contains "true" for every character category that includes a dictionary
79     * character.
80     */
81    protected void buildCharCategories(Vector<String> tempRuleList) {
82        super.buildCharCategories(tempRuleList);
83
84        categoryFlags = new boolean[categories.size()];
85        for (int i = 0; i < categories.size(); i++) {
86            CharSet cs = categories.elementAt(i);
87            if (!(cs.intersection(dictionaryChars).empty())) {
88                categoryFlags[i] = true;
89            }
90        }
91    }
92
93    // This function is actually called by
94    // RuleBasedBreakIteratorBuilder.buildCharCategories(), which is called by
95    // the function above. This gives us a way to create a separate character
96    // category for the dictionary characters even when
97    // RuleBasedBreakIteratorBuilder isn't making a distinction.
98    protected void mungeExpressionList(Hashtable<String, Object> expressions) {
99        expressions.put(dictionaryExpression, dictionaryChars);
100    }
101
102    void makeFile(String filename) {
103        super.setAdditionalData(super.toByteArray(categoryFlags));
104        super.makeFile(filename);
105    }
106}
107