1/*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.activation.registries;
27
28/**
29 *      A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
30 *      Useful for parsing MIME content types.
31 */
32public class MailcapTokenizer {
33
34    public static final int UNKNOWN_TOKEN = 0;
35    public static final int START_TOKEN = 1;
36    public static final int STRING_TOKEN = 2;
37    public static final int EOI_TOKEN = 5;
38    public static final int SLASH_TOKEN = '/';
39    public static final int SEMICOLON_TOKEN = ';';
40    public static final int EQUALS_TOKEN = '=';
41
42    /**
43     *  Constructor
44     *
45     *  @param  inputString the string to tokenize
46     */
47    public MailcapTokenizer(String inputString) {
48        data = inputString;
49        dataIndex = 0;
50        dataLength = inputString.length();
51
52        currentToken = START_TOKEN;
53        currentTokenValue = "";
54
55        isAutoquoting = false;
56        autoquoteChar = ';';
57    }
58
59    /**
60     *  Set whether auto-quoting is on or off.
61     *
62     *  Auto-quoting means that all characters after the first
63     *  non-whitespace, non-control character up to the auto-quote
64     *  terminator character or EOI (minus any whitespace immediatley
65     *  preceeding it) is considered a token.
66     *
67     *  This is required for handling command strings in a mailcap entry.
68     */
69    public void setIsAutoquoting(boolean value) {
70        isAutoquoting = value;
71    }
72
73    /**
74     *  Retrieve current token.
75     *
76     *  @return    The current token value
77     */
78    public int getCurrentToken() {
79        return currentToken;
80    }
81
82    /*
83     *  Get a String that describes the given token.
84     */
85    public static String nameForToken(int token) {
86        String name = "really unknown";
87
88        switch(token) {
89            case UNKNOWN_TOKEN:
90                name = "unknown";
91                break;
92            case START_TOKEN:
93                name = "start";
94                break;
95            case STRING_TOKEN:
96                name = "string";
97                break;
98            case EOI_TOKEN:
99                name = "EOI";
100                break;
101            case SLASH_TOKEN:
102                name = "'/'";
103                break;
104            case SEMICOLON_TOKEN:
105                name = "';'";
106                break;
107            case EQUALS_TOKEN:
108                name = "'='";
109                break;
110        }
111
112        return name;
113    }
114
115    /*
116     *  Retrieve current token value.
117     *
118     *  @return    A String containing the current token value
119     */
120    public String getCurrentTokenValue() {
121        return currentTokenValue;
122    }
123    /*
124     *  Process the next token.
125     *
126     *  @return    the next token
127     */
128    public int nextToken() {
129        if (dataIndex < dataLength) {
130            //  skip white space
131            while ((dataIndex < dataLength) &&
132                    (isWhiteSpaceChar(data.charAt(dataIndex)))) {
133                ++dataIndex;
134            }
135
136            if (dataIndex < dataLength) {
137                //  examine the current character and see what kind of token we have
138                char c = data.charAt(dataIndex);
139                if (isAutoquoting) {
140                    if (c == ';' || c == '=') {
141                        currentToken = c;
142                        currentTokenValue = new Character(c).toString();
143                        ++dataIndex;
144                    } else {
145                        processAutoquoteToken();
146                    }
147                } else {
148                    if (isStringTokenChar(c)) {
149                        processStringToken();
150                    } else if ((c == '/') || (c == ';') || (c == '=')) {
151                        currentToken = c;
152                        currentTokenValue = new Character(c).toString();
153                        ++dataIndex;
154                    } else {
155                        currentToken = UNKNOWN_TOKEN;
156                        currentTokenValue = new Character(c).toString();
157                        ++dataIndex;
158                    }
159                }
160            } else {
161                currentToken = EOI_TOKEN;
162                currentTokenValue = null;
163            }
164        } else {
165            currentToken = EOI_TOKEN;
166            currentTokenValue = null;
167        }
168
169        return currentToken;
170    }
171
172    private void processStringToken() {
173        //  capture the initial index
174        int initialIndex = dataIndex;
175
176        //  skip to 1st non string token character
177        while ((dataIndex < dataLength) &&
178                isStringTokenChar(data.charAt(dataIndex))) {
179            ++dataIndex;
180        }
181
182        currentToken = STRING_TOKEN;
183        currentTokenValue = data.substring(initialIndex, dataIndex);
184    }
185
186    private void processAutoquoteToken() {
187        //  capture the initial index
188        int initialIndex = dataIndex;
189
190        //  now skip to the 1st non-escaped autoquote termination character
191        //  XXX - doesn't actually consider escaping
192        boolean foundTerminator = false;
193        while ((dataIndex < dataLength) && !foundTerminator) {
194            char c = data.charAt(dataIndex);
195            if (c != autoquoteChar) {
196                ++dataIndex;
197            } else {
198                foundTerminator = true;
199            }
200        }
201
202        currentToken = STRING_TOKEN;
203        currentTokenValue =
204            fixEscapeSequences(data.substring(initialIndex, dataIndex));
205    }
206
207    private static boolean isSpecialChar(char c) {
208        boolean lAnswer = false;
209
210        switch(c) {
211            case '(':
212            case ')':
213            case '<':
214            case '>':
215            case '@':
216            case ',':
217            case ';':
218            case ':':
219            case '\\':
220            case '"':
221            case '/':
222            case '[':
223            case ']':
224            case '?':
225            case '=':
226                lAnswer = true;
227                break;
228        }
229
230        return lAnswer;
231    }
232
233    private static boolean isControlChar(char c) {
234        return Character.isISOControl(c);
235    }
236
237    private static boolean isWhiteSpaceChar(char c) {
238        return Character.isWhitespace(c);
239    }
240
241    private static boolean isStringTokenChar(char c) {
242        return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
243    }
244
245    private static String fixEscapeSequences(String inputString) {
246        int inputLength = inputString.length();
247        StringBuffer buffer = new StringBuffer();
248        buffer.ensureCapacity(inputLength);
249
250        for (int i = 0; i < inputLength; ++i) {
251            char currentChar = inputString.charAt(i);
252            if (currentChar != '\\') {
253                buffer.append(currentChar);
254            } else {
255                if (i < inputLength - 1) {
256                    char nextChar = inputString.charAt(i + 1);
257                    buffer.append(nextChar);
258
259                    //  force a skip over the next character too
260                    ++i;
261                } else {
262                    buffer.append(currentChar);
263                }
264            }
265        }
266
267        return buffer.toString();
268    }
269
270    private String  data;
271    private int     dataIndex;
272    private int     dataLength;
273    private int     currentToken;
274    private String  currentTokenValue;
275    private boolean isAutoquoting;
276    private char    autoquoteChar;
277
278    /*
279    public static void main(String[] args) {
280        for (int i = 0; i < args.length; ++i) {
281            MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
282
283            System.out.println("Original: |" + args[i] + "|");
284
285            int currentToken = tokenizer.nextToken();
286            while (currentToken != EOI_TOKEN) {
287                switch(currentToken) {
288                    case UNKNOWN_TOKEN:
289                        System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
290                        break;
291                    case START_TOKEN:
292                        System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
293                        break;
294                    case STRING_TOKEN:
295                        System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
296                        break;
297                    case EOI_TOKEN:
298                        System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
299                        break;
300                    case SLASH_TOKEN:
301                        System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
302                        break;
303                    case SEMICOLON_TOKEN:
304                        System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
305                        break;
306                    case EQUALS_TOKEN:
307                        System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
308                        break;
309                    default:
310                        System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
311                        break;
312                }
313
314                currentToken = tokenizer.nextToken();
315            }
316
317            System.out.println("");
318        }
319    }
320    */
321}
322