DictionaryBasedBreakIteratorBuilder.java revision 8845:4be14673b9bf
1/* 2 * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package build.tools.generatebreakiteratordata; 27 28import java.util.Hashtable; 29import java.util.Vector; 30 31/** 32 * The Builder class for DictionaryBasedBreakIterator inherits almost all of 33 * its functionality from RuleBasedBreakIteratorBuilder, but extends it with 34 * extra logic to handle the "<dictionary>" token. 35 */ 36class DictionaryBasedBreakIteratorBuilder extends RuleBasedBreakIteratorBuilder { 37 38 /** 39 * A list of flags indicating which character categories are contained in 40 * the dictionary file (this is used to determine which ranges of characters 41 * to apply the dictionary to) 42 */ 43 private boolean[] categoryFlags; 44 45 /** 46 * A CharSet that contains all the characters represented in the dictionary 47 */ 48 private CharSet dictionaryChars = new CharSet(); 49 private String dictionaryExpression = ""; 50 51 public DictionaryBasedBreakIteratorBuilder(String description) { 52 super(description); 53 } 54 55 /** 56 * We override handleSpecialSubstitution() to add logic to handle 57 * the <dictionary> tag. If we see a substitution named "<dictionary>", 58 * parse the substitution expression and store the result in 59 * dictionaryChars. 60 */ 61 protected void handleSpecialSubstitution(String replace, String replaceWith, 62 int startPos, String description) { 63 super.handleSpecialSubstitution(replace, replaceWith, startPos, description); 64 65 if (replace.equals("<dictionary>")) { 66 if (replaceWith.charAt(0) == '(') { 67 error("Dictionary group can't be enclosed in (", startPos, description); 68 } 69 dictionaryExpression = replaceWith; 70 dictionaryChars = CharSet.parseString(replaceWith); 71 } 72 } 73 74 /** 75 * The other half of the logic to handle the dictionary characters happens 76 * here. After the inherited builder has derived the real character 77 * categories, we set up the categoryFlags array in the iterator. This array 78 * contains "true" for every character category that includes a dictionary 79 * character. 80 */ 81 protected void buildCharCategories(Vector<String> tempRuleList) { 82 super.buildCharCategories(tempRuleList); 83 84 categoryFlags = new boolean[categories.size()]; 85 for (int i = 0; i < categories.size(); i++) { 86 CharSet cs = categories.elementAt(i); 87 if (!(cs.intersection(dictionaryChars).empty())) { 88 categoryFlags[i] = true; 89 } 90 } 91 } 92 93 // This function is actually called by 94 // RuleBasedBreakIteratorBuilder.buildCharCategories(), which is called by 95 // the function above. This gives us a way to create a separate character 96 // category for the dictionary characters even when 97 // RuleBasedBreakIteratorBuilder isn't making a distinction. 98 protected void mungeExpressionList(Hashtable<String, Object> expressions) { 99 expressions.put(dictionaryExpression, dictionaryChars); 100 } 101 102 void makeFile(String filename) { 103 super.setAdditionalData(super.toByteArray(categoryFlags)); 104 super.makeFile(filename); 105 } 106} 107