1/*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27*******************************************************************************
28*   Copyright (C) 2001-2010, International Business Machines
29*   Corporation and others.  All Rights Reserved.
30*******************************************************************************
31*/
32/* Written by Simon Montagu, Matitiahu Allouche
33 * (ported from C code written by Markus W. Scherer)
34 */
35
36package sun.text.bidi;
37
38import sun.text.normalizer.UCharacter;
39import sun.text.normalizer.UTF16;
40
41final class BidiWriter {
42
43    /** Bidi control code points */
44    static final char LRM_CHAR = 0x200e;
45    static final char RLM_CHAR = 0x200f;
46    static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
47                                  1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
48
49    private static boolean IsCombining(int type) {
50        return ((1<<type &
51                (1<<UCharacter.NON_SPACING_MARK |
52                 1<<UCharacter.COMBINING_SPACING_MARK |
53                 1<<UCharacter.ENCLOSING_MARK)) != 0);
54    }
55
56    /*
57     * When we have OUTPUT_REVERSE set on writeReordered(), then we
58     * semantically write RTL runs in reverse and later reverse them again.
59     * Instead, we actually write them in forward order to begin with.
60     * However, if the RTL run was to be mirrored, we need to mirror here now
61     * since the implicit second reversal must not do it.
62     * It looks strange to do mirroring in LTR output, but it is only because
63     * we are writing RTL output in reverse.
64     */
65    private static String doWriteForward(String src, int options) {
66        /* optimize for several combinations of options */
67        switch(options&(BidiBase.REMOVE_BIDI_CONTROLS|BidiBase.DO_MIRRORING)) {
68        case 0: {
69            /* simply return the LTR run */
70            return src;
71        }
72        case BidiBase.DO_MIRRORING: {
73            StringBuffer dest = new StringBuffer(src.length());
74
75            /* do mirroring */
76            int i=0;
77            int c;
78
79            do {
80                c = UTF16.charAt(src, i);
81                i += UTF16.getCharCount(c);
82                UTF16.append(dest, UCharacter.getMirror(c));
83            } while(i < src.length());
84            return dest.toString();
85        }
86        case BidiBase.REMOVE_BIDI_CONTROLS: {
87            StringBuilder dest = new StringBuilder(src.length());
88
89            /* copy the LTR run and remove any Bidi control characters */
90            int i = 0;
91            char c;
92            do {
93                c = src.charAt(i++);
94                if(!BidiBase.IsBidiControlChar(c)) {
95                    dest.append(c);
96                }
97            } while(i < src.length());
98            return dest.toString();
99        }
100        default: {
101            StringBuffer dest = new StringBuffer(src.length());
102
103            /* remove Bidi control characters and do mirroring */
104            int i = 0;
105            int c;
106            do {
107                c = UTF16.charAt(src, i);
108                i += UTF16.getCharCount(c);
109                if(!BidiBase.IsBidiControlChar(c)) {
110                    UTF16.append(dest, UCharacter.getMirror(c));
111                }
112            } while(i < src.length());
113                return dest.toString();
114            }
115        } /* end of switch */
116    }
117
118    private static String doWriteForward(char[] text, int start, int limit,
119                                         int options) {
120        return doWriteForward(new String(text, start, limit - start), options);
121    }
122
123    static String writeReverse(String src, int options) {
124        /*
125         * RTL run -
126         *
127         * RTL runs need to be copied to the destination in reverse order
128         * of code points, not code units, to keep Unicode characters intact.
129         *
130         * The general strategy for this is to read the source text
131         * in backward order, collect all code units for a code point
132         * (and optionally following combining characters, see below),
133         * and copy all these code units in ascending order
134         * to the destination for this run.
135         *
136         * Several options request whether combining characters
137         * should be kept after their base characters,
138         * whether Bidi control characters should be removed, and
139         * whether characters should be replaced by their mirror-image
140         * equivalent Unicode characters.
141         */
142        StringBuffer dest = new StringBuffer(src.length());
143
144        /* optimize for several combinations of options */
145        switch (options &
146                (BidiBase.REMOVE_BIDI_CONTROLS |
147                 BidiBase.DO_MIRRORING |
148                 BidiBase.KEEP_BASE_COMBINING)) {
149
150        case 0:
151            /*
152             * With none of the "complicated" options set, the destination
153             * run will have the same length as the source run,
154             * and there is no mirroring and no keeping combining characters
155             * with their base characters.
156             *
157             * XXX: or dest = UTF16.reverse(new StringBuffer(src));
158             */
159
160            int srcLength = src.length();
161
162            /* preserve character integrity */
163            do {
164                /* i is always after the last code unit known to need to be kept
165                 *  in this segment */
166                int i = srcLength;
167
168                /* collect code units for one base character */
169                srcLength -= UTF16.getCharCount(UTF16.charAt(src,
170                                                             srcLength - 1));
171
172                /* copy this base character */
173                dest.append(src.substring(srcLength, i));
174            } while(srcLength > 0);
175            break;
176
177        case BidiBase.KEEP_BASE_COMBINING:
178            /*
179             * Here, too, the destination
180             * run will have the same length as the source run,
181             * and there is no mirroring.
182             * We do need to keep combining characters with their base
183             * characters.
184             */
185            srcLength = src.length();
186
187            /* preserve character integrity */
188            do {
189                /* i is always after the last code unit known to need to be kept
190                 *  in this segment */
191                int c;
192                int i = srcLength;
193
194                /* collect code units and modifier letters for one base
195                 * character */
196                do {
197                    c = UTF16.charAt(src, srcLength - 1);
198                    srcLength -= UTF16.getCharCount(c);
199                } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
200
201                /* copy this "user character" */
202                dest.append(src.substring(srcLength, i));
203            } while(srcLength > 0);
204            break;
205
206        default:
207            /*
208             * With several "complicated" options set, this is the most
209             * general and the slowest copying of an RTL run.
210             * We will do mirroring, remove Bidi controls, and
211             * keep combining characters with their base characters
212             * as requested.
213             */
214            srcLength = src.length();
215
216            /* preserve character integrity */
217            do {
218                /* i is always after the last code unit known to need to be kept
219                 *  in this segment */
220                int i = srcLength;
221
222                /* collect code units for one base character */
223                int c = UTF16.charAt(src, srcLength - 1);
224                srcLength -= UTF16.getCharCount(c);
225                if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) {
226                    /* collect modifier letters for this base character */
227                    while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
228                        c = UTF16.charAt(src, srcLength - 1);
229                        srcLength -= UTF16.getCharCount(c);
230                    }
231                }
232
233                if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 &&
234                    BidiBase.IsBidiControlChar(c)) {
235                    /* do not copy this Bidi control character */
236                    continue;
237                }
238
239                /* copy this "user character" */
240                int j = srcLength;
241                if((options & BidiBase.DO_MIRRORING) != 0) {
242                    /* mirror only the base character */
243                    c = UCharacter.getMirror(c);
244                    UTF16.append(dest, c);
245                    j += UTF16.getCharCount(c);
246                }
247                dest.append(src.substring(j, i));
248            } while(srcLength > 0);
249            break;
250        } /* end of switch */
251
252        return dest.toString();
253    }
254
255    static String doWriteReverse(char[] text, int start, int limit, int options) {
256        return writeReverse(new String(text, start, limit - start), options);
257    }
258
259    static String writeReordered(BidiBase bidi, int options) {
260        int run, runCount;
261        StringBuilder dest;
262        char[] text = bidi.text;
263        runCount = bidi.countRuns();
264
265        /*
266         * Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the
267         * reordering mode (checked below) is appropriate.
268         */
269        if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) {
270            options |= BidiBase.INSERT_LRM_FOR_NUMERIC;
271            options &= ~BidiBase.REMOVE_BIDI_CONTROLS;
272        }
273        /*
274         * Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS
275         * and cancels BidiBase.INSERT_LRM_FOR_NUMERIC.
276         */
277        if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) {
278            options |= BidiBase.REMOVE_BIDI_CONTROLS;
279            options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
280        }
281        /*
282         * If we do not perform the "inverse Bidi" algorithm, then we
283         * don't need to insert any LRMs, and don't need to test for it.
284         */
285        if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L) &&
286            (bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT)  &&
287            (bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
288            (bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) {
289            options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
290        }
291        dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ?
292                                 bidi.length * 2 : bidi.length);
293        /*
294         * Iterate through all visual runs and copy the run text segments to
295         * the destination, according to the options.
296         *
297         * The tests for where to insert LRMs ignore the fact that there may be
298         * BN codes or non-BMP code points at the beginning and end of a run;
299         * they may insert LRMs unnecessarily but the tests are faster this way
300         * (this would have to be improved for UTF-8).
301         */
302        if ((options & BidiBase.OUTPUT_REVERSE) == 0) {
303            /* forward output */
304            if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
305                /* do not insert Bidi controls */
306                for (run = 0; run < runCount; ++run) {
307                    BidiRun bidiRun = bidi.getVisualRun(run);
308                    if (bidiRun.isEvenRun()) {
309                        dest.append(doWriteForward(text, bidiRun.start,
310                                                   bidiRun.limit,
311                                                   options & ~BidiBase.DO_MIRRORING));
312                     } else {
313                        dest.append(doWriteReverse(text, bidiRun.start,
314                                                   bidiRun.limit, options));
315                     }
316                }
317            } else {
318                /* insert Bidi controls for "inverse Bidi" */
319                byte[] dirProps = bidi.dirProps;
320                char uc;
321                int markFlag;
322
323                for (run = 0; run < runCount; ++run) {
324                    BidiRun bidiRun = bidi.getVisualRun(run);
325                    markFlag=0;
326                    /* check if something relevant in insertPoints */
327                    markFlag = bidi.runs[run].insertRemove;
328                    if (markFlag < 0) { /* bidi controls count */
329                        markFlag = 0;
330                    }
331                    if (bidiRun.isEvenRun()) {
332                        if (bidi.isInverse() &&
333                                dirProps[bidiRun.start] != BidiBase.L) {
334                            markFlag |= BidiBase.LRM_BEFORE;
335                        }
336                        if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
337                            uc = LRM_CHAR;
338                        } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
339                            uc = RLM_CHAR;
340                        } else {
341                            uc = 0;
342                        }
343                        if (uc != 0) {
344                            dest.append(uc);
345                        }
346                        dest.append(doWriteForward(text,
347                                                   bidiRun.start, bidiRun.limit,
348                                                   options & ~BidiBase.DO_MIRRORING));
349
350                        if (bidi.isInverse() &&
351                             dirProps[bidiRun.limit - 1] != BidiBase.L) {
352                            markFlag |= BidiBase.LRM_AFTER;
353                        }
354                        if ((markFlag & BidiBase.LRM_AFTER) != 0) {
355                            uc = LRM_CHAR;
356                        } else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
357                            uc = RLM_CHAR;
358                        } else {
359                            uc = 0;
360                        }
361                        if (uc != 0) {
362                            dest.append(uc);
363                        }
364                    } else { /* RTL run */
365                        if (bidi.isInverse() &&
366                            !bidi.testDirPropFlagAt(MASK_R_AL,
367                                                    bidiRun.limit - 1)) {
368                            markFlag |= BidiBase.RLM_BEFORE;
369                        }
370                        if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
371                            uc = LRM_CHAR;
372                        } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
373                            uc = RLM_CHAR;
374                        } else {
375                            uc = 0;
376                        }
377                        if (uc != 0) {
378                            dest.append(uc);
379                        }
380                        dest.append(doWriteReverse(text, bidiRun.start,
381                                                   bidiRun.limit, options));
382
383                        if(bidi.isInverse() &&
384                                (MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
385                            markFlag |= BidiBase.RLM_AFTER;
386                        }
387                        if ((markFlag & BidiBase.LRM_AFTER) != 0) {
388                            uc = LRM_CHAR;
389                        } else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
390                            uc = RLM_CHAR;
391                        } else {
392                            uc = 0;
393                        }
394                        if (uc != 0) {
395                            dest.append(uc);
396                        }
397                    }
398                }
399            }
400        } else {
401            /* reverse output */
402            if((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
403                /* do not insert Bidi controls */
404                for(run = runCount; --run >= 0; ) {
405                    BidiRun bidiRun = bidi.getVisualRun(run);
406                    if (bidiRun.isEvenRun()) {
407                        dest.append(doWriteReverse(text,
408                                                   bidiRun.start, bidiRun.limit,
409                                                   options & ~BidiBase.DO_MIRRORING));
410                    } else {
411                        dest.append(doWriteForward(text, bidiRun.start,
412                                                   bidiRun.limit, options));
413                    }
414                }
415            } else {
416                /* insert Bidi controls for "inverse Bidi" */
417
418                byte[] dirProps = bidi.dirProps;
419
420                for (run = runCount; --run >= 0; ) {
421                    /* reverse output */
422                    BidiRun bidiRun = bidi.getVisualRun(run);
423                    if (bidiRun.isEvenRun()) {
424                        if (dirProps[bidiRun.limit - 1] != BidiBase.L) {
425                            dest.append(LRM_CHAR);
426                        }
427
428                        dest.append(doWriteReverse(text, bidiRun.start,
429                                bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
430
431                        if (dirProps[bidiRun.start] != BidiBase.L) {
432                            dest.append(LRM_CHAR);
433                        }
434                    } else {
435                        if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
436                            dest.append(RLM_CHAR);
437                        }
438
439                        dest.append(doWriteForward(text, bidiRun.start,
440                                                   bidiRun.limit, options));
441
442                        if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
443                            dest.append(RLM_CHAR);
444                        }
445                    }
446                }
447            }
448        }
449
450        return dest.toString();
451    }
452}
453