1/*
2 * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 *******************************************************************************
28 * Copyright (C) 2009-2010, International Business Machines Corporation and    *
29 * others. All Rights Reserved.                                                *
30 *******************************************************************************
31 */
32package sun.util.locale;
33
34import java.util.ArrayList;
35import java.util.HashMap;
36import java.util.HashSet;
37import java.util.List;
38import java.util.Map;
39import java.util.Set;
40
41public final class InternalLocaleBuilder {
42
43    private static final CaseInsensitiveChar PRIVATEUSE_KEY
44        = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE);
45
46    private String language = "";
47    private String script = "";
48    private String region = "";
49    private String variant = "";
50
51    private Map<CaseInsensitiveChar, String> extensions;
52    private Set<CaseInsensitiveString> uattributes;
53    private Map<CaseInsensitiveString, String> ukeywords;
54
55
56    public InternalLocaleBuilder() {
57    }
58
59    public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
60        if (LocaleUtils.isEmpty(language)) {
61            this.language = "";
62        } else {
63            if (!LanguageTag.isLanguage(language)) {
64                throw new LocaleSyntaxException("Ill-formed language: " + language, 0);
65            }
66            this.language = language;
67        }
68        return this;
69    }
70
71    public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
72        if (LocaleUtils.isEmpty(script)) {
73            this.script = "";
74        } else {
75            if (!LanguageTag.isScript(script)) {
76                throw new LocaleSyntaxException("Ill-formed script: " + script, 0);
77            }
78            this.script = script;
79        }
80        return this;
81    }
82
83    public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
84        if (LocaleUtils.isEmpty(region)) {
85            this.region = "";
86        } else {
87            if (!LanguageTag.isRegion(region)) {
88                throw new LocaleSyntaxException("Ill-formed region: " + region, 0);
89            }
90            this.region = region;
91        }
92        return this;
93    }
94
95    public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
96        if (LocaleUtils.isEmpty(variant)) {
97            this.variant = "";
98        } else {
99            // normalize separators to "_"
100            String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);
101            int errIdx = checkVariants(var, BaseLocale.SEP);
102            if (errIdx != -1) {
103                throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
104            }
105            this.variant = var;
106        }
107        return this;
108    }
109
110    public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
111        if (!UnicodeLocaleExtension.isAttribute(attribute)) {
112            throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
113        }
114        // Use case insensitive string to prevent duplication
115        if (uattributes == null) {
116            uattributes = new HashSet<>(4);
117        }
118        uattributes.add(new CaseInsensitiveString(attribute));
119        return this;
120    }
121
122    public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
123        if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
124            throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
125        }
126        if (uattributes != null) {
127            uattributes.remove(new CaseInsensitiveString(attribute));
128        }
129        return this;
130    }
131
132    public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException {
133        if (!UnicodeLocaleExtension.isKey(key)) {
134            throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key);
135        }
136
137        CaseInsensitiveString cikey = new CaseInsensitiveString(key);
138        if (type == null) {
139            if (ukeywords != null) {
140                // null type is used for remove the key
141                ukeywords.remove(cikey);
142            }
143        } else {
144            if (type.length() != 0) {
145                // normalize separator to "-"
146                String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
147                // validate
148                StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);
149                while (!itr.isDone()) {
150                    String s = itr.current();
151                    if (!UnicodeLocaleExtension.isTypeSubtag(s)) {
152                        throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: "
153                                                        + type,
154                                                        itr.currentStart());
155                    }
156                    itr.next();
157                }
158            }
159            if (ukeywords == null) {
160                ukeywords = new HashMap<>(4);
161            }
162            ukeywords.put(cikey, type);
163        }
164        return this;
165    }
166
167    public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
168        // validate key
169        boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton);
170        if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) {
171            throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
172        }
173
174        boolean remove = LocaleUtils.isEmpty(value);
175        CaseInsensitiveChar key = new CaseInsensitiveChar(singleton);
176
177        if (remove) {
178            if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
179                // clear entire Unicode locale extension
180                if (uattributes != null) {
181                    uattributes.clear();
182                }
183                if (ukeywords != null) {
184                    ukeywords.clear();
185                }
186            } else {
187                if (extensions != null && extensions.containsKey(key)) {
188                    extensions.remove(key);
189                }
190            }
191        } else {
192            // validate value
193            String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
194            StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);
195            while (!itr.isDone()) {
196                String s = itr.current();
197                boolean validSubtag;
198                if (isBcpPrivateuse) {
199                    validSubtag = LanguageTag.isPrivateuseSubtag(s);
200                } else {
201                    validSubtag = LanguageTag.isExtensionSubtag(s);
202                }
203                if (!validSubtag) {
204                    throw new LocaleSyntaxException("Ill-formed extension value: " + s,
205                                                    itr.currentStart());
206                }
207                itr.next();
208            }
209
210            if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
211                setUnicodeLocaleExtension(val);
212            } else {
213                if (extensions == null) {
214                    extensions = new HashMap<>(4);
215                }
216                extensions.put(key, val);
217            }
218        }
219        return this;
220    }
221
222    /*
223     * Set extension/private subtags in a single string representation
224     */
225    public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException {
226        if (LocaleUtils.isEmpty(subtags)) {
227            clearExtensions();
228            return this;
229        }
230        subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
231        StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
232
233        List<String> extensions = null;
234        String privateuse = null;
235
236        int parsed = 0;
237        int start;
238
239        // Make a list of extension subtags
240        while (!itr.isDone()) {
241            String s = itr.current();
242            if (LanguageTag.isExtensionSingleton(s)) {
243                start = itr.currentStart();
244                String singleton = s;
245                StringBuilder sb = new StringBuilder(singleton);
246
247                itr.next();
248                while (!itr.isDone()) {
249                    s = itr.current();
250                    if (LanguageTag.isExtensionSubtag(s)) {
251                        sb.append(LanguageTag.SEP).append(s);
252                        parsed = itr.currentEnd();
253                    } else {
254                        break;
255                    }
256                    itr.next();
257                }
258
259                if (parsed < start) {
260                    throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'",
261                                                    start);
262                }
263
264                if (extensions == null) {
265                    extensions = new ArrayList<>(4);
266                }
267                extensions.add(sb.toString());
268            } else {
269                break;
270            }
271        }
272        if (!itr.isDone()) {
273            String s = itr.current();
274            if (LanguageTag.isPrivateusePrefix(s)) {
275                start = itr.currentStart();
276                StringBuilder sb = new StringBuilder(s);
277
278                itr.next();
279                while (!itr.isDone()) {
280                    s = itr.current();
281                    if (!LanguageTag.isPrivateuseSubtag(s)) {
282                        break;
283                    }
284                    sb.append(LanguageTag.SEP).append(s);
285                    parsed = itr.currentEnd();
286
287                    itr.next();
288                }
289                if (parsed <= start) {
290                    throw new LocaleSyntaxException("Incomplete privateuse:"
291                                                    + subtags.substring(start),
292                                                    start);
293                } else {
294                    privateuse = sb.toString();
295                }
296            }
297        }
298
299        if (!itr.isDone()) {
300            throw new LocaleSyntaxException("Ill-formed extension subtags:"
301                                            + subtags.substring(itr.currentStart()),
302                                            itr.currentStart());
303        }
304
305        return setExtensions(extensions, privateuse);
306    }
307
308    /*
309     * Set a list of BCP47 extensions and private use subtags
310     * BCP47 extensions are already validated and well-formed, but may contain duplicates
311     */
312    private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) {
313        clearExtensions();
314
315        if (!LocaleUtils.isEmpty(bcpExtensions)) {
316            Set<CaseInsensitiveChar> done = new HashSet<>(bcpExtensions.size());
317            for (String bcpExt : bcpExtensions) {
318                CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt);
319                // ignore duplicates
320                if (!done.contains(key)) {
321                    // each extension string contains singleton, e.g. "a-abc-def"
322                    if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
323                        setUnicodeLocaleExtension(bcpExt.substring(2));
324                    } else {
325                        if (extensions == null) {
326                            extensions = new HashMap<>(4);
327                        }
328                        extensions.put(key, bcpExt.substring(2));
329                    }
330                }
331                done.add(key);
332            }
333        }
334        if (privateuse != null && privateuse.length() > 0) {
335            // privateuse string contains prefix, e.g. "x-abc-def"
336            if (extensions == null) {
337                extensions = new HashMap<>(1);
338            }
339            extensions.put(new CaseInsensitiveChar(privateuse), privateuse.substring(2));
340        }
341
342        return this;
343    }
344
345    /*
346     * Reset Builder's internal state with the given language tag
347     */
348    public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) {
349        clear();
350        if (!langtag.getExtlangs().isEmpty()) {
351            language = langtag.getExtlangs().get(0);
352        } else {
353            String lang = langtag.getLanguage();
354            if (!lang.equals(LanguageTag.UNDETERMINED)) {
355                language = lang;
356            }
357        }
358        script = langtag.getScript();
359        region = langtag.getRegion();
360
361        List<String> bcpVariants = langtag.getVariants();
362        if (!bcpVariants.isEmpty()) {
363            StringBuilder var = new StringBuilder(bcpVariants.get(0));
364            int size = bcpVariants.size();
365            for (int i = 1; i < size; i++) {
366                var.append(BaseLocale.SEP).append(bcpVariants.get(i));
367            }
368            variant = var.toString();
369        }
370
371        setExtensions(langtag.getExtensions(), langtag.getPrivateuse());
372
373        return this;
374    }
375
376    public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions localeExtensions) throws LocaleSyntaxException {
377        String language = base.getLanguage();
378        String script = base.getScript();
379        String region = base.getRegion();
380        String variant = base.getVariant();
381
382        // Special backward compatibility support
383
384        // Exception 1 - ja_JP_JP
385        if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) {
386            // When locale ja_JP_JP is created, ca-japanese is always there.
387            // The builder ignores the variant "JP"
388            assert("japanese".equals(localeExtensions.getUnicodeLocaleType("ca")));
389            variant = "";
390        }
391        // Exception 2 - th_TH_TH
392        else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) {
393            // When locale th_TH_TH is created, nu-thai is always there.
394            // The builder ignores the variant "TH"
395            assert("thai".equals(localeExtensions.getUnicodeLocaleType("nu")));
396            variant = "";
397        }
398        // Exception 3 - no_NO_NY
399        else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) {
400            // no_NO_NY is a valid locale and used by Java 6 or older versions.
401            // The build ignores the variant "NY" and change the language to "nn".
402            language = "nn";
403            variant = "";
404        }
405
406        // Validate base locale fields before updating internal state.
407        // LocaleExtensions always store validated/canonicalized values,
408        // so no checks are necessary.
409        if (language.length() > 0 && !LanguageTag.isLanguage(language)) {
410            throw new LocaleSyntaxException("Ill-formed language: " + language);
411        }
412
413        if (script.length() > 0 && !LanguageTag.isScript(script)) {
414            throw new LocaleSyntaxException("Ill-formed script: " + script);
415        }
416
417        if (region.length() > 0 && !LanguageTag.isRegion(region)) {
418            throw new LocaleSyntaxException("Ill-formed region: " + region);
419        }
420
421        if (variant.length() > 0) {
422            int errIdx = checkVariants(variant, BaseLocale.SEP);
423            if (errIdx != -1) {
424                throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
425            }
426        }
427
428        // The input locale is validated at this point.
429        // Now, updating builder's internal fields.
430        this.language = language;
431        this.script = script;
432        this.region = region;
433        this.variant = variant;
434        clearExtensions();
435
436        Set<Character> extKeys = (localeExtensions == null) ? null : localeExtensions.getKeys();
437        if (extKeys != null) {
438            // map localeExtensions back to builder's internal format
439            for (Character key : extKeys) {
440                Extension e = localeExtensions.getExtension(key);
441                if (e instanceof UnicodeLocaleExtension) {
442                    UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e;
443                    for (String uatr : ue.getUnicodeLocaleAttributes()) {
444                        if (uattributes == null) {
445                            uattributes = new HashSet<>(4);
446                        }
447                        uattributes.add(new CaseInsensitiveString(uatr));
448                    }
449                    for (String ukey : ue.getUnicodeLocaleKeys()) {
450                        if (ukeywords == null) {
451                            ukeywords = new HashMap<>(4);
452                        }
453                        ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey));
454                    }
455                } else {
456                    if (extensions == null) {
457                        extensions = new HashMap<>(4);
458                    }
459                    extensions.put(new CaseInsensitiveChar(key), e.getValue());
460                }
461            }
462        }
463        return this;
464    }
465
466    public InternalLocaleBuilder clear() {
467        language = "";
468        script = "";
469        region = "";
470        variant = "";
471        clearExtensions();
472        return this;
473    }
474
475    public InternalLocaleBuilder clearExtensions() {
476        if (extensions != null) {
477            extensions.clear();
478        }
479        if (uattributes != null) {
480            uattributes.clear();
481        }
482        if (ukeywords != null) {
483            ukeywords.clear();
484        }
485        return this;
486    }
487
488    public BaseLocale getBaseLocale() {
489        String language = this.language;
490        String script = this.script;
491        String region = this.region;
492        String variant = this.variant;
493
494        // Special private use subtag sequence identified by "lvariant" will be
495        // interpreted as Java variant.
496        if (extensions != null) {
497            String privuse = extensions.get(PRIVATEUSE_KEY);
498            if (privuse != null) {
499                StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP);
500                boolean sawPrefix = false;
501                int privVarStart = -1;
502                while (!itr.isDone()) {
503                    if (sawPrefix) {
504                        privVarStart = itr.currentStart();
505                        break;
506                    }
507                    if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
508                        sawPrefix = true;
509                    }
510                    itr.next();
511                }
512                if (privVarStart != -1) {
513                    StringBuilder sb = new StringBuilder(variant);
514                    if (sb.length() != 0) {
515                        sb.append(BaseLocale.SEP);
516                    }
517                    sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP,
518                                                                         BaseLocale.SEP));
519                    variant = sb.toString();
520                }
521            }
522        }
523
524        return BaseLocale.getInstance(language, script, region, variant);
525    }
526
527    public LocaleExtensions getLocaleExtensions() {
528        if (LocaleUtils.isEmpty(extensions) && LocaleUtils.isEmpty(uattributes)
529            && LocaleUtils.isEmpty(ukeywords)) {
530            return null;
531        }
532
533        LocaleExtensions lext = new LocaleExtensions(extensions, uattributes, ukeywords);
534        return lext.isEmpty() ? null : lext;
535    }
536
537    /*
538     * Remove special private use subtag sequence identified by "lvariant"
539     * and return the rest. Only used by LocaleExtensions
540     */
541    static String removePrivateuseVariant(String privuseVal) {
542        StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP);
543
544        // Note: privateuse value "abc-lvariant" is unchanged
545        // because no subtags after "lvariant".
546
547        int prefixStart = -1;
548        boolean sawPrivuseVar = false;
549        while (!itr.isDone()) {
550            if (prefixStart != -1) {
551                // Note: privateuse value "abc-lvariant" is unchanged
552                // because no subtags after "lvariant".
553                sawPrivuseVar = true;
554                break;
555            }
556            if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
557                prefixStart = itr.currentStart();
558            }
559            itr.next();
560        }
561        if (!sawPrivuseVar) {
562            return privuseVal;
563        }
564
565        assert(prefixStart == 0 || prefixStart > 1);
566        return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1);
567    }
568
569    /*
570     * Check if the given variant subtags separated by the given
571     * separator(s) are valid
572     */
573    private int checkVariants(String variants, String sep) {
574        StringTokenIterator itr = new StringTokenIterator(variants, sep);
575        while (!itr.isDone()) {
576            String s = itr.current();
577            if (!LanguageTag.isVariant(s)) {
578                return itr.currentStart();
579            }
580            itr.next();
581        }
582        return -1;
583    }
584
585    /*
586     * Private methods parsing Unicode Locale Extension subtags.
587     * Duplicated attributes/keywords will be ignored.
588     * The input must be a valid extension subtags (excluding singleton).
589     */
590    private void setUnicodeLocaleExtension(String subtags) {
591        // wipe out existing attributes/keywords
592        if (uattributes != null) {
593            uattributes.clear();
594        }
595        if (ukeywords != null) {
596            ukeywords.clear();
597        }
598
599        StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
600
601        // parse attributes
602        while (!itr.isDone()) {
603            if (!UnicodeLocaleExtension.isAttribute(itr.current())) {
604                break;
605            }
606            if (uattributes == null) {
607                uattributes = new HashSet<>(4);
608            }
609            uattributes.add(new CaseInsensitiveString(itr.current()));
610            itr.next();
611        }
612
613        // parse keywords
614        CaseInsensitiveString key = null;
615        String type;
616        int typeStart = -1;
617        int typeEnd = -1;
618        while (!itr.isDone()) {
619            if (key != null) {
620                if (UnicodeLocaleExtension.isKey(itr.current())) {
621                    // next keyword - emit previous one
622                    assert(typeStart == -1 || typeEnd != -1);
623                    type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
624                    if (ukeywords == null) {
625                        ukeywords = new HashMap<>(4);
626                    }
627                    ukeywords.put(key, type);
628
629                    // reset keyword info
630                    CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current());
631                    key = ukeywords.containsKey(tmpKey) ? null : tmpKey;
632                    typeStart = typeEnd = -1;
633                } else {
634                    if (typeStart == -1) {
635                        typeStart = itr.currentStart();
636                    }
637                    typeEnd = itr.currentEnd();
638                }
639            } else if (UnicodeLocaleExtension.isKey(itr.current())) {
640                // 1. first keyword or
641                // 2. next keyword, but previous one was duplicate
642                key = new CaseInsensitiveString(itr.current());
643                if (ukeywords != null && ukeywords.containsKey(key)) {
644                    // duplicate
645                    key = null;
646                }
647            }
648
649            if (!itr.hasNext()) {
650                if (key != null) {
651                    // last keyword
652                    assert(typeStart == -1 || typeEnd != -1);
653                    type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
654                    if (ukeywords == null) {
655                        ukeywords = new HashMap<>(4);
656                    }
657                    ukeywords.put(key, type);
658                }
659                break;
660            }
661
662            itr.next();
663        }
664    }
665
666    static final class CaseInsensitiveString {
667        private final String str, lowerStr;
668
669        CaseInsensitiveString(String s) {
670            str = s;
671            lowerStr = LocaleUtils.toLowerString(s);
672        }
673
674        public String value() {
675            return str;
676        }
677
678        @Override
679        public int hashCode() {
680            return lowerStr.hashCode();
681        }
682
683        @Override
684        public boolean equals(Object obj) {
685            if (this == obj) {
686                return true;
687            }
688            if (!(obj instanceof CaseInsensitiveString)) {
689                return false;
690            }
691            return lowerStr.equals(((CaseInsensitiveString)obj).lowerStr);
692        }
693    }
694
695    static final class CaseInsensitiveChar {
696        private final char ch, lowerCh;
697
698        /**
699         * Constructs a CaseInsensitiveChar with the first char of the
700         * given s.
701         */
702        private CaseInsensitiveChar(String s) {
703            this(s.charAt(0));
704        }
705
706        CaseInsensitiveChar(char c) {
707            ch = c;
708            lowerCh = LocaleUtils.toLower(ch);
709        }
710
711        public char value() {
712            return ch;
713        }
714
715        @Override
716        public int hashCode() {
717            return lowerCh;
718        }
719
720        @Override
721        public boolean equals(Object obj) {
722            if (this == obj) {
723                return true;
724            }
725            if (!(obj instanceof CaseInsensitiveChar)) {
726                return false;
727            }
728            return lowerCh == ((CaseInsensitiveChar)obj).lowerCh;
729        }
730    }
731}
732