1/*
2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25package xmlkit; // -*- mode: java; indent-tabs-mode: nil -*-
26
27import java.util.*;
28
29/**
30 * A List of Strings each representing a word or token.
31 * This object itself is a CharSequence whose characters consist
32 * of all the tokens, separated by blanks.
33 *
34 * @author jrose
35 */
36public class TokenList extends ArrayList<String> implements CharSequence {
37
38    protected String separator;
39    protected boolean frozen;
40
41    public TokenList() {
42        this.separator = " ";
43    }
44
45    public TokenList(Collection<? extends Object> tokens) {
46        super(tokens.size());
47        this.separator = " ";
48        addTokens(tokens);
49    }
50
51    public TokenList(Collection<? extends Object> tokens, String separator) {
52        super(tokens.size());
53        this.separator = separator;
54        addTokens(tokens);
55    }
56
57    public TokenList(Object[] tokens) {
58        super(tokens.length);
59        this.separator = " ";
60        addTokens(tokens, 0, tokens.length);
61    }
62
63    public TokenList(Object[] tokens, int beg, int end) {
64        super(end - beg);  // capacity
65        this.separator = " ";
66        addTokens(tokens, beg, end);
67    }
68
69    public TokenList(Object[] tokens, int beg, int end, String separator) {
70        super(end - beg);  // capacity
71        this.separator = separator;
72        addTokens(tokens, beg, end);
73    }
74
75    public TokenList(String tokenStr) {
76        this(tokenStr, " ", false);
77    }
78
79    public TokenList(String tokenStr, String separator) {
80        this(tokenStr, separator, true);
81    }
82
83    public TokenList(String tokenStr, String separator, boolean allowNulls) {
84        super(tokenStr.length() / 5);
85        this.separator = separator;
86        addTokens(tokenStr, allowNulls);
87    }
88    static public final TokenList EMPTY;
89
90    static {
91        TokenList tl = new TokenList(new Object[0]);
92        tl.freeze();
93        EMPTY = tl;
94    }
95
96    public void freeze() {
97        if (!frozen) {
98            for (ListIterator<String> i = listIterator(); i.hasNext();) {
99                i.set(i.next().toString());
100            }
101            trimToSize();
102            frozen = true;
103        }
104    }
105
106    public boolean isFrozen() {
107        return frozen;
108    }
109
110    void checkNotFrozen() {
111        if (isFrozen()) {
112            throw new UnsupportedOperationException("cannot modify frozen TokenList");
113        }
114    }
115
116    public String getSeparator() {
117        return separator;
118    }
119
120    public void setSeparator(String separator) {
121        checkNotFrozen();
122        this.separator = separator;
123    }
124
125    /// All normal List mutators must check the frozen bit:
126    public String set(int index, String o) {
127        checkNotFrozen();
128        return super.set(index, o);
129    }
130
131    public boolean add(String o) {
132        checkNotFrozen();
133        return super.add(o);
134    }
135
136    public void add(int index, String o) {
137        checkNotFrozen();
138        super.add(index, o);
139    }
140
141    public boolean addAll(Collection<? extends String> c) {
142        checkNotFrozen();
143        return super.addAll(c);
144    }
145
146    public boolean addAll(int index, Collection<? extends String> c) {
147        checkNotFrozen();
148        return super.addAll(index, c);
149    }
150
151    public boolean remove(Object o) {
152        checkNotFrozen();
153        return super.remove(o);
154    }
155
156    public String remove(int index) {
157        checkNotFrozen();
158        return super.remove(index);
159    }
160
161    public void clear() {
162        checkNotFrozen();
163        super.clear();
164    }
165
166    /** Add a collection of tokens to the list, applying toString to each. */
167    public boolean addTokens(Collection<? extends Object> tokens) {
168        // Note that if this sequence is empty, no tokens are added.
169        // This is different from adding a null string, which is
170        // a single token.
171        boolean added = false;
172        for (Object token : tokens) {
173            add(token.toString());
174            added = true;
175        }
176        return added;
177    }
178
179    public boolean addTokens(Object[] tokens, int beg, int end) {
180        boolean added = false;
181        for (int i = beg; i < end; i++) {
182            add(tokens[i].toString());
183            added = true;
184        }
185        return added;
186    }
187
188    public boolean addTokens(String tokenStr) {
189        return addTokens(tokenStr, false);
190    }
191
192    public boolean addTokens(String tokenStr, boolean allowNulls) {
193        boolean added = false;
194        int pos = 0, limit = tokenStr.length(), sep = limit;
195        while (pos < limit) {
196            sep = tokenStr.indexOf(separator, pos);
197            if (sep < 0) {
198                sep = limit;
199            }
200            if (sep == pos) {
201                if (allowNulls) {
202                    add("");
203                    added = true;
204                }
205                pos += separator.length();
206            } else {
207                add(tokenStr.substring(pos, sep));
208                added = true;
209                pos = sep + separator.length();
210            }
211        }
212        if (allowNulls && sep < limit) {
213            // Input was something like "tok1 tok2 ".
214            add("");
215            added = true;
216        }
217        return added;
218    }
219
220    public boolean addToken(Object token) {
221        return add(token.toString());
222    }
223
224    /** Format the token string, using quotes and escapes.
225     *  Quotes must contain an odd number of 3 or more elements,
226     *  a sequence of begin/end quote pairs, plus a superquote.
227     *  For each token, the first begin/end pair is used for
228     *  which the end quote does not occur in the token.
229     *  If the token contains all end quotes, the last pair
230     *  is used, with all occurrences of the end quote replaced
231     *  by the superquote.  If an end quote is the empty string,
232     *  the separator is used instead.
233     */
234    public String format(String separator, String[] quotes) {
235        return ""; //@@
236    }
237    protected int[] lengths;
238    protected static final int MODC = 0, HINT = 1, BEG0 = 2, END0 = 3;
239
240    // Layout of lengths:
241    //   { modCount, hint, -1==beg[0], end[0]==beg[1], ..., length }
242    // Note that each beg[i]..end[i] span includes a leading separator,
243    // which is not part of the corresponding token.
244    protected final CharSequence getCS(int i) {
245        return (CharSequence) get(i);
246    }
247
248    // Produce (and cache) an table of indexes for each token.
249    protected int[] getLengths() {
250        int[] lengths = this.lengths;
251        ;
252        int sepLength = separator.length();
253        if (lengths == null || lengths[MODC] != modCount) {
254            int size = this.size();
255            lengths = new int[END0 + size + (size == 0 ? 1 : 0)];
256            lengths[MODC] = modCount;
257            int end = -sepLength;  // cancels leading separator
258            lengths[BEG0] = end;
259            for (int i = 0; i < size; i++) {
260                end += sepLength;  // count leading separator
261                end += getCS(i).length();
262                lengths[END0 + i] = end;
263            }
264            this.lengths = lengths;
265        }
266        return lengths;
267    }
268
269    public int length() {
270        int[] lengths = getLengths();
271        return lengths[lengths.length - 1];
272    }
273
274    // Which token does the given index belong to?
275    protected int which(int i) {
276        if (i < 0) {
277            return -1;
278        }
279        int[] lengths = getLengths();
280        for (int hint = lengths[HINT];; hint = 0) {
281            for (int wh = hint; wh < lengths.length - END0; wh++) {
282                int beg = lengths[BEG0 + wh];
283                int end = lengths[END0 + wh];
284                if (i >= beg && i < end) {
285                    lengths[HINT] = wh;
286                    return wh;
287                }
288            }
289            if (hint == 0) {
290                return size();  // end of the line
291            }
292        }
293    }
294
295    public char charAt(int i) {
296        if (i < 0) {
297            return "".charAt(i);
298        }
299        int wh = which(i);
300        int beg = lengths[BEG0 + wh];
301        int j = i - beg;
302        int sepLength = separator.length();
303        if (j < sepLength) {
304            return separator.charAt(j);
305        }
306        return getCS(wh).charAt(j - sepLength);
307    }
308
309    public CharSequence subSequence(int beg, int end) {
310        //System.out.println("i: "+beg+".."+end);
311        if (beg == end) {
312            return "";
313        }
314        if (beg < 0) {
315            charAt(beg);  // raise exception
316        }
317        if (beg > end) {
318            charAt(-1);   // raise exception
319        }
320        int begWh = which(beg);
321        int endWh = which(end);
322        if (endWh == size() || end == lengths[BEG0 + endWh]) {
323            --endWh;
324        }
325        //System.out.println("wh: "+begWh+".."+endWh);
326        int begBase = lengths[BEG0 + begWh];
327        int endBase = lengths[BEG0 + endWh];
328        int sepLength = separator.length();
329        int begFrag = 0;
330        if ((beg - begBase) < sepLength) {
331            begFrag = sepLength - (beg - begBase);
332            beg += begFrag;
333        }
334        int endFrag = 0;
335        if ((end - endBase) < sepLength) {
336            endFrag = (end - endBase);
337            end = endBase;
338            endBase = lengths[BEG0 + --endWh];
339        }
340        if (false) {
341            System.out.print("beg[wbf]end[wbf]");
342            int pr[] = {begWh, begBase, begFrag, beg, endWh, endBase, endFrag, end};
343            for (int k = 0; k < pr.length; k++) {
344                System.out.print((k == 4 ? "   " : " ") + (pr[k]));
345            }
346            System.out.println();
347        }
348        if (begFrag > 0 && (end + endFrag) - begBase <= sepLength) {
349            // Special case:  Slice the separator.
350            beg -= begFrag;
351            end += endFrag;
352            return separator.substring(beg - begBase, end - begBase);
353        }
354        if (begWh == endWh && (begFrag + endFrag) == 0) {
355            // Special case:  Slice a single token.
356            return getCS(begWh).subSequence(beg - begBase - sepLength,
357                    end - endBase - sepLength);
358        }
359        Object[] subTokens = new Object[1 + (endWh - begWh) + 1];
360        int fillp = 0;
361        if (begFrag == sepLength) {
362            // Insert a leading null token to force an initial separator.
363            subTokens[fillp++] = "";
364            begFrag = 0;
365        }
366        for (int wh = begWh; wh <= endWh; wh++) {
367            CharSequence cs = getCS(wh);
368            if (wh == begWh || wh == endWh) {
369                // Slice it.
370                int csBeg = (wh == begWh) ? (beg - begBase) - sepLength : 0;
371                int csEnd = (wh == endWh) ? (end - endBase) - sepLength : cs.length();
372                cs = cs.subSequence(csBeg, csEnd);
373                if (begFrag > 0 && wh == begWh) {
374                    cs = separator.substring(sepLength - begFrag) + cs;
375                }
376                if (endFrag > 0 && wh == endWh) {
377                    cs = cs.toString() + separator.substring(0, endFrag);
378                }
379            }
380            subTokens[fillp++] = cs;
381        }
382        return new TokenList(subTokens, 0, fillp, separator);
383    }
384
385    /** Returns the concatenation of all tokens,
386     *  with intervening separator characters.
387     */
388    public String toString() {
389        StringBuilder buf = new StringBuilder(length());
390        int size = this.size();
391        for (int i = 0; i < size; i++) {
392            if (i > 0) {
393                buf.append(separator);
394            }
395            buf.append(get(i));
396        }
397        return buf.toString();
398    }
399
400    /*---- TESTING CODE ----
401    public static void main(String[] av) {
402    if (av.length == 0)  av = new String[]{"one", "2", "", "four"};
403    TokenList ts = new TokenList();
404    final String SEP = ", ";
405    ts.setSeparator(SEP);
406    for (int i = -1; i < av.length; i++) {
407    if (i >= 0)  ts.addToken(av[i]);
408    {
409    TokenList tsCopy = new TokenList(ts.toString(), SEP);
410    if (!tsCopy.equals(ts)) {
411    tsCopy.setSeparator(")(");
412    System.out.println("!= ("+tsCopy+")");
413    }
414    }
415    {
416    TokenList tsBar = new TokenList(ts, "|");
417    tsBar.add(0, "[");
418    tsBar.add("]");
419    System.out.println(tsBar);
420    }
421    if (false) {
422    int[] ls = ts.getLengths();
423    System.out.println("ts: "+ts);
424    System.out.print("ls: {");
425    for (int j = 0; j < ls.length; j++)  System.out.print(" "+ls[j]);
426    System.out.println(" }");
427    }
428    assert0(ts.size() == i+1);
429    assert0(i < 0 || ts.get(i) == av[i]);
430    String tss = ts.toString();
431    int tslen = tss.length();
432    assert0(ts.length() == tss.length());
433    for (int n = 0; n < tslen; n++) {
434    assert0(ts.charAt(n) == tss.charAt(n));
435    }
436    for (int j = 0; j < tslen; j++) {
437    for (int k = tslen; k >= j; k--) {
438    CharSequence sub = ts.subSequence(j, k);
439    //System.out.println("|"+sub+"|");
440    assert0(sub.toString().equals(tss.substring(j, k)));
441    }
442    }
443    }
444    }
445    static void assert0(boolean z) {
446    if (!z)  throw new RuntimeException("assert failed");
447    }
448    // ---- TESTING CODE ----*/
449}
450