1/* 2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25package xmlkit; // -*- mode: java; indent-tabs-mode: nil -*- 26 27import java.util.*; 28 29/** 30 * A List of Strings each representing a word or token. 31 * This object itself is a CharSequence whose characters consist 32 * of all the tokens, separated by blanks. 33 * 34 * @author jrose 35 */ 36public class TokenList extends ArrayList<String> implements CharSequence { 37 38 protected String separator; 39 protected boolean frozen; 40 41 public TokenList() { 42 this.separator = " "; 43 } 44 45 public TokenList(Collection<? extends Object> tokens) { 46 super(tokens.size()); 47 this.separator = " "; 48 addTokens(tokens); 49 } 50 51 public TokenList(Collection<? extends Object> tokens, String separator) { 52 super(tokens.size()); 53 this.separator = separator; 54 addTokens(tokens); 55 } 56 57 public TokenList(Object[] tokens) { 58 super(tokens.length); 59 this.separator = " "; 60 addTokens(tokens, 0, tokens.length); 61 } 62 63 public TokenList(Object[] tokens, int beg, int end) { 64 super(end - beg); // capacity 65 this.separator = " "; 66 addTokens(tokens, beg, end); 67 } 68 69 public TokenList(Object[] tokens, int beg, int end, String separator) { 70 super(end - beg); // capacity 71 this.separator = separator; 72 addTokens(tokens, beg, end); 73 } 74 75 public TokenList(String tokenStr) { 76 this(tokenStr, " ", false); 77 } 78 79 public TokenList(String tokenStr, String separator) { 80 this(tokenStr, separator, true); 81 } 82 83 public TokenList(String tokenStr, String separator, boolean allowNulls) { 84 super(tokenStr.length() / 5); 85 this.separator = separator; 86 addTokens(tokenStr, allowNulls); 87 } 88 static public final TokenList EMPTY; 89 90 static { 91 TokenList tl = new TokenList(new Object[0]); 92 tl.freeze(); 93 EMPTY = tl; 94 } 95 96 public void freeze() { 97 if (!frozen) { 98 for (ListIterator<String> i = listIterator(); i.hasNext();) { 99 i.set(i.next().toString()); 100 } 101 trimToSize(); 102 frozen = true; 103 } 104 } 105 106 public boolean isFrozen() { 107 return frozen; 108 } 109 110 void checkNotFrozen() { 111 if (isFrozen()) { 112 throw new UnsupportedOperationException("cannot modify frozen TokenList"); 113 } 114 } 115 116 public String getSeparator() { 117 return separator; 118 } 119 120 public void setSeparator(String separator) { 121 checkNotFrozen(); 122 this.separator = separator; 123 } 124 125 /// All normal List mutators must check the frozen bit: 126 public String set(int index, String o) { 127 checkNotFrozen(); 128 return super.set(index, o); 129 } 130 131 public boolean add(String o) { 132 checkNotFrozen(); 133 return super.add(o); 134 } 135 136 public void add(int index, String o) { 137 checkNotFrozen(); 138 super.add(index, o); 139 } 140 141 public boolean addAll(Collection<? extends String> c) { 142 checkNotFrozen(); 143 return super.addAll(c); 144 } 145 146 public boolean addAll(int index, Collection<? extends String> c) { 147 checkNotFrozen(); 148 return super.addAll(index, c); 149 } 150 151 public boolean remove(Object o) { 152 checkNotFrozen(); 153 return super.remove(o); 154 } 155 156 public String remove(int index) { 157 checkNotFrozen(); 158 return super.remove(index); 159 } 160 161 public void clear() { 162 checkNotFrozen(); 163 super.clear(); 164 } 165 166 /** Add a collection of tokens to the list, applying toString to each. */ 167 public boolean addTokens(Collection<? extends Object> tokens) { 168 // Note that if this sequence is empty, no tokens are added. 169 // This is different from adding a null string, which is 170 // a single token. 171 boolean added = false; 172 for (Object token : tokens) { 173 add(token.toString()); 174 added = true; 175 } 176 return added; 177 } 178 179 public boolean addTokens(Object[] tokens, int beg, int end) { 180 boolean added = false; 181 for (int i = beg; i < end; i++) { 182 add(tokens[i].toString()); 183 added = true; 184 } 185 return added; 186 } 187 188 public boolean addTokens(String tokenStr) { 189 return addTokens(tokenStr, false); 190 } 191 192 public boolean addTokens(String tokenStr, boolean allowNulls) { 193 boolean added = false; 194 int pos = 0, limit = tokenStr.length(), sep = limit; 195 while (pos < limit) { 196 sep = tokenStr.indexOf(separator, pos); 197 if (sep < 0) { 198 sep = limit; 199 } 200 if (sep == pos) { 201 if (allowNulls) { 202 add(""); 203 added = true; 204 } 205 pos += separator.length(); 206 } else { 207 add(tokenStr.substring(pos, sep)); 208 added = true; 209 pos = sep + separator.length(); 210 } 211 } 212 if (allowNulls && sep < limit) { 213 // Input was something like "tok1 tok2 ". 214 add(""); 215 added = true; 216 } 217 return added; 218 } 219 220 public boolean addToken(Object token) { 221 return add(token.toString()); 222 } 223 224 /** Format the token string, using quotes and escapes. 225 * Quotes must contain an odd number of 3 or more elements, 226 * a sequence of begin/end quote pairs, plus a superquote. 227 * For each token, the first begin/end pair is used for 228 * which the end quote does not occur in the token. 229 * If the token contains all end quotes, the last pair 230 * is used, with all occurrences of the end quote replaced 231 * by the superquote. If an end quote is the empty string, 232 * the separator is used instead. 233 */ 234 public String format(String separator, String[] quotes) { 235 return ""; //@@ 236 } 237 protected int[] lengths; 238 protected static final int MODC = 0, HINT = 1, BEG0 = 2, END0 = 3; 239 240 // Layout of lengths: 241 // { modCount, hint, -1==beg[0], end[0]==beg[1], ..., length } 242 // Note that each beg[i]..end[i] span includes a leading separator, 243 // which is not part of the corresponding token. 244 protected final CharSequence getCS(int i) { 245 return (CharSequence) get(i); 246 } 247 248 // Produce (and cache) an table of indexes for each token. 249 protected int[] getLengths() { 250 int[] lengths = this.lengths; 251 ; 252 int sepLength = separator.length(); 253 if (lengths == null || lengths[MODC] != modCount) { 254 int size = this.size(); 255 lengths = new int[END0 + size + (size == 0 ? 1 : 0)]; 256 lengths[MODC] = modCount; 257 int end = -sepLength; // cancels leading separator 258 lengths[BEG0] = end; 259 for (int i = 0; i < size; i++) { 260 end += sepLength; // count leading separator 261 end += getCS(i).length(); 262 lengths[END0 + i] = end; 263 } 264 this.lengths = lengths; 265 } 266 return lengths; 267 } 268 269 public int length() { 270 int[] lengths = getLengths(); 271 return lengths[lengths.length - 1]; 272 } 273 274 // Which token does the given index belong to? 275 protected int which(int i) { 276 if (i < 0) { 277 return -1; 278 } 279 int[] lengths = getLengths(); 280 for (int hint = lengths[HINT];; hint = 0) { 281 for (int wh = hint; wh < lengths.length - END0; wh++) { 282 int beg = lengths[BEG0 + wh]; 283 int end = lengths[END0 + wh]; 284 if (i >= beg && i < end) { 285 lengths[HINT] = wh; 286 return wh; 287 } 288 } 289 if (hint == 0) { 290 return size(); // end of the line 291 } 292 } 293 } 294 295 public char charAt(int i) { 296 if (i < 0) { 297 return "".charAt(i); 298 } 299 int wh = which(i); 300 int beg = lengths[BEG0 + wh]; 301 int j = i - beg; 302 int sepLength = separator.length(); 303 if (j < sepLength) { 304 return separator.charAt(j); 305 } 306 return getCS(wh).charAt(j - sepLength); 307 } 308 309 public CharSequence subSequence(int beg, int end) { 310 //System.out.println("i: "+beg+".."+end); 311 if (beg == end) { 312 return ""; 313 } 314 if (beg < 0) { 315 charAt(beg); // raise exception 316 } 317 if (beg > end) { 318 charAt(-1); // raise exception 319 } 320 int begWh = which(beg); 321 int endWh = which(end); 322 if (endWh == size() || end == lengths[BEG0 + endWh]) { 323 --endWh; 324 } 325 //System.out.println("wh: "+begWh+".."+endWh); 326 int begBase = lengths[BEG0 + begWh]; 327 int endBase = lengths[BEG0 + endWh]; 328 int sepLength = separator.length(); 329 int begFrag = 0; 330 if ((beg - begBase) < sepLength) { 331 begFrag = sepLength - (beg - begBase); 332 beg += begFrag; 333 } 334 int endFrag = 0; 335 if ((end - endBase) < sepLength) { 336 endFrag = (end - endBase); 337 end = endBase; 338 endBase = lengths[BEG0 + --endWh]; 339 } 340 if (false) { 341 System.out.print("beg[wbf]end[wbf]"); 342 int pr[] = {begWh, begBase, begFrag, beg, endWh, endBase, endFrag, end}; 343 for (int k = 0; k < pr.length; k++) { 344 System.out.print((k == 4 ? " " : " ") + (pr[k])); 345 } 346 System.out.println(); 347 } 348 if (begFrag > 0 && (end + endFrag) - begBase <= sepLength) { 349 // Special case: Slice the separator. 350 beg -= begFrag; 351 end += endFrag; 352 return separator.substring(beg - begBase, end - begBase); 353 } 354 if (begWh == endWh && (begFrag + endFrag) == 0) { 355 // Special case: Slice a single token. 356 return getCS(begWh).subSequence(beg - begBase - sepLength, 357 end - endBase - sepLength); 358 } 359 Object[] subTokens = new Object[1 + (endWh - begWh) + 1]; 360 int fillp = 0; 361 if (begFrag == sepLength) { 362 // Insert a leading null token to force an initial separator. 363 subTokens[fillp++] = ""; 364 begFrag = 0; 365 } 366 for (int wh = begWh; wh <= endWh; wh++) { 367 CharSequence cs = getCS(wh); 368 if (wh == begWh || wh == endWh) { 369 // Slice it. 370 int csBeg = (wh == begWh) ? (beg - begBase) - sepLength : 0; 371 int csEnd = (wh == endWh) ? (end - endBase) - sepLength : cs.length(); 372 cs = cs.subSequence(csBeg, csEnd); 373 if (begFrag > 0 && wh == begWh) { 374 cs = separator.substring(sepLength - begFrag) + cs; 375 } 376 if (endFrag > 0 && wh == endWh) { 377 cs = cs.toString() + separator.substring(0, endFrag); 378 } 379 } 380 subTokens[fillp++] = cs; 381 } 382 return new TokenList(subTokens, 0, fillp, separator); 383 } 384 385 /** Returns the concatenation of all tokens, 386 * with intervening separator characters. 387 */ 388 public String toString() { 389 StringBuilder buf = new StringBuilder(length()); 390 int size = this.size(); 391 for (int i = 0; i < size; i++) { 392 if (i > 0) { 393 buf.append(separator); 394 } 395 buf.append(get(i)); 396 } 397 return buf.toString(); 398 } 399 400 /*---- TESTING CODE ---- 401 public static void main(String[] av) { 402 if (av.length == 0) av = new String[]{"one", "2", "", "four"}; 403 TokenList ts = new TokenList(); 404 final String SEP = ", "; 405 ts.setSeparator(SEP); 406 for (int i = -1; i < av.length; i++) { 407 if (i >= 0) ts.addToken(av[i]); 408 { 409 TokenList tsCopy = new TokenList(ts.toString(), SEP); 410 if (!tsCopy.equals(ts)) { 411 tsCopy.setSeparator(")("); 412 System.out.println("!= ("+tsCopy+")"); 413 } 414 } 415 { 416 TokenList tsBar = new TokenList(ts, "|"); 417 tsBar.add(0, "["); 418 tsBar.add("]"); 419 System.out.println(tsBar); 420 } 421 if (false) { 422 int[] ls = ts.getLengths(); 423 System.out.println("ts: "+ts); 424 System.out.print("ls: {"); 425 for (int j = 0; j < ls.length; j++) System.out.print(" "+ls[j]); 426 System.out.println(" }"); 427 } 428 assert0(ts.size() == i+1); 429 assert0(i < 0 || ts.get(i) == av[i]); 430 String tss = ts.toString(); 431 int tslen = tss.length(); 432 assert0(ts.length() == tss.length()); 433 for (int n = 0; n < tslen; n++) { 434 assert0(ts.charAt(n) == tss.charAt(n)); 435 } 436 for (int j = 0; j < tslen; j++) { 437 for (int k = tslen; k >= j; k--) { 438 CharSequence sub = ts.subSequence(j, k); 439 //System.out.println("|"+sub+"|"); 440 assert0(sub.toString().equals(tss.substring(j, k))); 441 } 442 } 443 } 444 } 445 static void assert0(boolean z) { 446 if (!z) throw new RuntimeException("assert failed"); 447 } 448 // ---- TESTING CODE ----*/ 449} 450