1/*
2******************************************************************************
3*
4*   Copyright (C) 2002-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ucnvbocu.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2002mar27
14*   created by: Markus W. Scherer
15*
16*   This is an implementation of the Binary Ordered Compression for Unicode,
17*   in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
18*/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_CONVERSION
23
24#include "unicode/ucnv.h"
25#include "unicode/ucnv_cb.h"
26#include "unicode/utf16.h"
27#include "putilimp.h"
28#include "ucnv_bld.h"
29#include "ucnv_cnv.h"
30#include "uassert.h"
31
32/* BOCU-1 constants and macros ---------------------------------------------- */
33
34/*
35 * BOCU-1 encodes the code points of a Unicode string as
36 * a sequence of byte-encoded differences (slope detection),
37 * preserving lexical order.
38 *
39 * Optimize the difference-taking for runs of Unicode text within
40 * small scripts:
41 *
42 * Most small scripts are allocated within aligned 128-blocks of Unicode
43 * code points. Lexical order is preserved if the "previous code point" state
44 * is always moved into the middle of such a block.
45 *
46 * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
47 * areas into the middle of those areas.
48 *
49 * C0 control codes and space are encoded with their US-ASCII bytes.
50 * "prev" is reset for C0 controls but not for space.
51 */
52
53/* initial value for "prev": middle of the ASCII range */
54#define BOCU1_ASCII_PREV        0x40
55
56/* bounding byte values for differences */
57#define BOCU1_MIN               0x21
58#define BOCU1_MIDDLE            0x90
59#define BOCU1_MAX_LEAD          0xfe
60#define BOCU1_MAX_TRAIL         0xff
61#define BOCU1_RESET             0xff
62
63/* number of lead bytes */
64#define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
65
66/* adjust trail byte counts for the use of some C0 control byte values */
67#define BOCU1_TRAIL_CONTROLS_COUNT  20
68#define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
69
70/* number of trail bytes */
71#define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
72
73/*
74 * number of positive and negative single-byte codes
75 * (counting 0==BOCU1_MIDDLE among the positive ones)
76 */
77#define BOCU1_SINGLE            64
78
79/* number of lead bytes for positive and negative 2/3/4-byte sequences */
80#define BOCU1_LEAD_2            43
81#define BOCU1_LEAD_3            3
82#define BOCU1_LEAD_4            1
83
84/* The difference value range for single-byters. */
85#define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
86#define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
87
88/* The difference value range for double-byters. */
89#define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
90#define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
91
92/* The difference value range for 3-byters. */
93#define BOCU1_REACH_POS_3   \
94    (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
95
96#define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
97
98/* The lead byte start values. */
99#define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
100#define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
101#define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
102     /* ==BOCU1_MAX_LEAD */
103
104#define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
105#define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
106#define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
107     /* ==BOCU1_MIN+1 */
108
109/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
110#define BOCU1_LENGTH_FROM_LEAD(lead) \
111    ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
112     (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
113     (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
114
115/* The length of a byte sequence, according to its packed form. */
116#define BOCU1_LENGTH_FROM_PACKED(packed) \
117    ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
118
119/*
120 * 12 commonly used C0 control codes (and space) are only used to encode
121 * themselves directly,
122 * which makes BOCU-1 MIME-usable and reasonably safe for
123 * ASCII-oriented software.
124 *
125 * These controls are
126 *  0   NUL
127 *
128 *  7   BEL
129 *  8   BS
130 *
131 *  9   TAB
132 *  a   LF
133 *  b   VT
134 *  c   FF
135 *  d   CR
136 *
137 *  e   SO
138 *  f   SI
139 *
140 * 1a   SUB
141 * 1b   ESC
142 *
143 * The other 20 C0 controls are also encoded directly (to preserve order)
144 * but are also used as trail bytes in difference encoding
145 * (for better compression).
146 */
147#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
148
149/*
150 * Byte value map for control codes,
151 * from external byte values 0x00..0x20
152 * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
153 * External byte values that are illegal as trail bytes are mapped to -1.
154 */
155static const int8_t
156bocu1ByteToTrail[BOCU1_MIN]={
157/*  0     1     2     3     4     5     6     7    */
158    -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
159
160/*  8     9     a     b     c     d     e     f    */
161    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
162
163/*  10    11    12    13    14    15    16    17   */
164    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
165
166/*  18    19    1a    1b    1c    1d    1e    1f   */
167    0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
168
169/*  20   */
170    -1
171};
172
173/*
174 * Byte value map for control codes,
175 * from trail byte values 0..19 (0..0x13) as used in the difference calculation
176 * to external byte values 0x00..0x20.
177 */
178static const int8_t
179bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
180/*  0     1     2     3     4     5     6     7    */
181    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
182
183/*  8     9     a     b     c     d     e     f    */
184    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
185
186/*  10    11    12    13   */
187    0x1c, 0x1d, 0x1e, 0x1f
188};
189
190/**
191 * Integer division and modulo with negative numerators
192 * yields negative modulo results and quotients that are one more than
193 * what we need here.
194 * This macro adjust the results so that the modulo-value m is always >=0.
195 *
196 * For positive n, the if() condition is always FALSE.
197 *
198 * @param n Number to be split into quotient and rest.
199 *          Will be modified to contain the quotient.
200 * @param d Divisor.
201 * @param m Output variable for the rest (modulo result).
202 */
203#define NEGDIVMOD(n, d, m) { \
204    (m)=(n)%(d); \
205    (n)/=(d); \
206    if((m)<0) { \
207        --(n); \
208        (m)+=(d); \
209    } \
210}
211
212/* Faster versions of packDiff() for single-byte-encoded diff values. */
213
214/** Is a diff value encodable in a single byte? */
215#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
216
217/** Encode a diff value in a single byte. */
218#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
219
220/** Is a diff value encodable in two bytes? */
221#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
222
223/* BOCU-1 implementation functions ------------------------------------------ */
224
225#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
226
227/**
228 * Compute the next "previous" value for differencing
229 * from the current code point.
230 *
231 * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
232 * @return "previous code point" state value
233 */
234static inline int32_t
235bocu1Prev(int32_t c) {
236    /* compute new prev */
237    if(/* 0x3040<=c && */ c<=0x309f) {
238        /* Hiragana is not 128-aligned */
239        return 0x3070;
240    } else if(0x4e00<=c && c<=0x9fa5) {
241        /* CJK Unihan */
242        return 0x4e00-BOCU1_REACH_NEG_2;
243    } else if(0xac00<=c /* && c<=0xd7a3 */) {
244        /* Korean Hangul */
245        return (0xd7a3+0xac00)/2;
246    } else {
247        /* mostly small scripts */
248        return BOCU1_SIMPLE_PREV(c);
249    }
250}
251
252/** Fast version of bocu1Prev() for most scripts. */
253#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
254
255/*
256 * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
257 * The UConverter fields are used as follows:
258 *
259 * fromUnicodeStatus    encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
260 *
261 * toUnicodeStatus      decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
262 * mode                 decoder's incomplete (diff<<2)|count (ignored when toULength==0)
263 */
264
265/* BOCU-1-from-Unicode conversion functions --------------------------------- */
266
267/**
268 * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
269 * and return a packed integer with them.
270 *
271 * The encoding favors small absolute differences with short encodings
272 * to compress runs of same-script characters.
273 *
274 * Optimized version with unrolled loops and fewer floating-point operations
275 * than the standard packDiff().
276 *
277 * @param diff difference value -0x10ffff..0x10ffff
278 * @return
279 *      0x010000zz for 1-byte sequence zz
280 *      0x0200yyzz for 2-byte sequence yy zz
281 *      0x03xxyyzz for 3-byte sequence xx yy zz
282 *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
283 */
284static int32_t
285packDiff(int32_t diff) {
286    int32_t result, m;
287
288    U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
289    if(diff>=BOCU1_REACH_NEG_1) {
290        /* mostly positive differences, and single-byte negative ones */
291#if 0   /* single-byte case handled in macros, see below */
292        if(diff<=BOCU1_REACH_POS_1) {
293            /* single byte */
294            return 0x01000000|(BOCU1_MIDDLE+diff);
295        } else
296#endif
297        if(diff<=BOCU1_REACH_POS_2) {
298            /* two bytes */
299            diff-=BOCU1_REACH_POS_1+1;
300            result=0x02000000;
301
302            m=diff%BOCU1_TRAIL_COUNT;
303            diff/=BOCU1_TRAIL_COUNT;
304            result|=BOCU1_TRAIL_TO_BYTE(m);
305
306            result|=(BOCU1_START_POS_2+diff)<<8;
307        } else if(diff<=BOCU1_REACH_POS_3) {
308            /* three bytes */
309            diff-=BOCU1_REACH_POS_2+1;
310            result=0x03000000;
311
312            m=diff%BOCU1_TRAIL_COUNT;
313            diff/=BOCU1_TRAIL_COUNT;
314            result|=BOCU1_TRAIL_TO_BYTE(m);
315
316            m=diff%BOCU1_TRAIL_COUNT;
317            diff/=BOCU1_TRAIL_COUNT;
318            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
319
320            result|=(BOCU1_START_POS_3+diff)<<16;
321        } else {
322            /* four bytes */
323            diff-=BOCU1_REACH_POS_3+1;
324
325            m=diff%BOCU1_TRAIL_COUNT;
326            diff/=BOCU1_TRAIL_COUNT;
327            result=BOCU1_TRAIL_TO_BYTE(m);
328
329            m=diff%BOCU1_TRAIL_COUNT;
330            diff/=BOCU1_TRAIL_COUNT;
331            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
332
333            /*
334             * We know that / and % would deliver quotient 0 and rest=diff.
335             * Avoid division and modulo for performance.
336             */
337            result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
338
339            result|=((uint32_t)BOCU1_START_POS_4)<<24;
340        }
341    } else {
342        /* two- to four-byte negative differences */
343        if(diff>=BOCU1_REACH_NEG_2) {
344            /* two bytes */
345            diff-=BOCU1_REACH_NEG_1;
346            result=0x02000000;
347
348            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
349            result|=BOCU1_TRAIL_TO_BYTE(m);
350
351            result|=(BOCU1_START_NEG_2+diff)<<8;
352        } else if(diff>=BOCU1_REACH_NEG_3) {
353            /* three bytes */
354            diff-=BOCU1_REACH_NEG_2;
355            result=0x03000000;
356
357            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
358            result|=BOCU1_TRAIL_TO_BYTE(m);
359
360            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
361            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
362
363            result|=(BOCU1_START_NEG_3+diff)<<16;
364        } else {
365            /* four bytes */
366            diff-=BOCU1_REACH_NEG_3;
367
368            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
369            result=BOCU1_TRAIL_TO_BYTE(m);
370
371            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
372            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
373
374            /*
375             * We know that NEGDIVMOD would deliver
376             * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
377             * Avoid division and modulo for performance.
378             */
379            m=diff+BOCU1_TRAIL_COUNT;
380            result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
381
382            result|=BOCU1_MIN<<24;
383        }
384    }
385    return result;
386}
387
388
389static void
390_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
391                             UErrorCode *pErrorCode) {
392    UConverter *cnv;
393    const UChar *source, *sourceLimit;
394    uint8_t *target;
395    int32_t targetCapacity;
396    int32_t *offsets;
397
398    int32_t prev, c, diff;
399
400    int32_t sourceIndex, nextSourceIndex;
401
402U_ALIGN_CODE(16)
403
404    /* set up the local pointers */
405    cnv=pArgs->converter;
406    source=pArgs->source;
407    sourceLimit=pArgs->sourceLimit;
408    target=(uint8_t *)pArgs->target;
409    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
410    offsets=pArgs->offsets;
411
412    /* get the converter state from UConverter */
413    c=cnv->fromUChar32;
414    prev=(int32_t)cnv->fromUnicodeStatus;
415    if(prev==0) {
416        prev=BOCU1_ASCII_PREV;
417    }
418
419    /* sourceIndex=-1 if the current character began in the previous buffer */
420    sourceIndex= c==0 ? 0 : -1;
421    nextSourceIndex=0;
422
423    /* conversion loop */
424    if(c!=0 && targetCapacity>0) {
425        goto getTrail;
426    }
427
428fastSingle:
429    /* fast loop for single-byte differences */
430    /* use only one loop counter variable, targetCapacity, not also source */
431    diff=(int32_t)(sourceLimit-source);
432    if(targetCapacity>diff) {
433        targetCapacity=diff;
434    }
435    while(targetCapacity>0 && (c=*source)<0x3000) {
436        if(c<=0x20) {
437            if(c!=0x20) {
438                prev=BOCU1_ASCII_PREV;
439            }
440            *target++=(uint8_t)c;
441            *offsets++=nextSourceIndex++;
442            ++source;
443            --targetCapacity;
444        } else {
445            diff=c-prev;
446            if(DIFF_IS_SINGLE(diff)) {
447                prev=BOCU1_SIMPLE_PREV(c);
448                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
449                *offsets++=nextSourceIndex++;
450                ++source;
451                --targetCapacity;
452            } else {
453                break;
454            }
455        }
456    }
457    /* restore real values */
458    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
459    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
460
461    /* regular loop for all cases */
462    while(source<sourceLimit) {
463        if(targetCapacity>0) {
464            c=*source++;
465            ++nextSourceIndex;
466
467            if(c<=0x20) {
468                /*
469                 * ISO C0 control & space:
470                 * Encode directly for MIME compatibility,
471                 * and reset state except for space, to not disrupt compression.
472                 */
473                if(c!=0x20) {
474                    prev=BOCU1_ASCII_PREV;
475                }
476                *target++=(uint8_t)c;
477                *offsets++=sourceIndex;
478                --targetCapacity;
479
480                sourceIndex=nextSourceIndex;
481                continue;
482            }
483
484            if(U16_IS_LEAD(c)) {
485getTrail:
486                if(source<sourceLimit) {
487                    /* test the following code unit */
488                    UChar trail=*source;
489                    if(U16_IS_TRAIL(trail)) {
490                        ++source;
491                        ++nextSourceIndex;
492                        c=U16_GET_SUPPLEMENTARY(c, trail);
493                    }
494                } else {
495                    /* no more input */
496                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
497                    break;
498                }
499            }
500
501            /*
502             * all other Unicode code points c==U+0021..U+10ffff
503             * are encoded with the difference c-prev
504             *
505             * a new prev is computed from c,
506             * placed in the middle of a 0x80-block (for most small scripts) or
507             * in the middle of the Unihan and Hangul blocks
508             * to statistically minimize the following difference
509             */
510            diff=c-prev;
511            prev=BOCU1_PREV(c);
512            if(DIFF_IS_SINGLE(diff)) {
513                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
514                *offsets++=sourceIndex;
515                --targetCapacity;
516                sourceIndex=nextSourceIndex;
517                if(c<0x3000) {
518                    goto fastSingle;
519                }
520            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
521                /* optimize 2-byte case */
522                int32_t m;
523
524                if(diff>=0) {
525                    diff-=BOCU1_REACH_POS_1+1;
526                    m=diff%BOCU1_TRAIL_COUNT;
527                    diff/=BOCU1_TRAIL_COUNT;
528                    diff+=BOCU1_START_POS_2;
529                } else {
530                    diff-=BOCU1_REACH_NEG_1;
531                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
532                    diff+=BOCU1_START_NEG_2;
533                }
534                *target++=(uint8_t)diff;
535                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
536                *offsets++=sourceIndex;
537                *offsets++=sourceIndex;
538                targetCapacity-=2;
539                sourceIndex=nextSourceIndex;
540            } else {
541                int32_t length; /* will be 2..4 */
542
543                diff=packDiff(diff);
544                length=BOCU1_LENGTH_FROM_PACKED(diff);
545
546                /* write the output character bytes from diff and length */
547                /* from the first if in the loop we know that targetCapacity>0 */
548                if(length<=targetCapacity) {
549                    switch(length) {
550                        /* each branch falls through to the next one */
551                    case 4:
552                        *target++=(uint8_t)(diff>>24);
553                        *offsets++=sourceIndex;
554                    case 3: /*fall through*/
555                        *target++=(uint8_t)(diff>>16);
556                        *offsets++=sourceIndex;
557                    case 2: /*fall through*/
558                        *target++=(uint8_t)(diff>>8);
559                        *offsets++=sourceIndex;
560                    /* case 1: handled above */
561                        *target++=(uint8_t)diff;
562                        *offsets++=sourceIndex;
563                    default:
564                        /* will never occur */
565                        break;
566                    }
567                    targetCapacity-=length;
568                    sourceIndex=nextSourceIndex;
569                } else {
570                    uint8_t *charErrorBuffer;
571
572                    /*
573                     * We actually do this backwards here:
574                     * In order to save an intermediate variable, we output
575                     * first to the overflow buffer what does not fit into the
576                     * regular target.
577                     */
578                    /* we know that 1<=targetCapacity<length<=4 */
579                    length-=targetCapacity;
580                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
581                    switch(length) {
582                        /* each branch falls through to the next one */
583                    case 3:
584                        *charErrorBuffer++=(uint8_t)(diff>>16);
585                    case 2: /*fall through*/
586                        *charErrorBuffer++=(uint8_t)(diff>>8);
587                    case 1: /*fall through*/
588                        *charErrorBuffer=(uint8_t)diff;
589                    default:
590                        /* will never occur */
591                        break;
592                    }
593                    cnv->charErrorBufferLength=(int8_t)length;
594
595                    /* now output what fits into the regular target */
596                    diff>>=8*length; /* length was reduced by targetCapacity */
597                    switch(targetCapacity) {
598                        /* each branch falls through to the next one */
599                    case 3:
600                        *target++=(uint8_t)(diff>>16);
601                        *offsets++=sourceIndex;
602                    case 2: /*fall through*/
603                        *target++=(uint8_t)(diff>>8);
604                        *offsets++=sourceIndex;
605                    case 1: /*fall through*/
606                        *target++=(uint8_t)diff;
607                        *offsets++=sourceIndex;
608                    default:
609                        /* will never occur */
610                        break;
611                    }
612
613                    /* target overflow */
614                    targetCapacity=0;
615                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
616                    break;
617                }
618            }
619        } else {
620            /* target is full */
621            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
622            break;
623        }
624    }
625
626    /* set the converter state back into UConverter */
627    cnv->fromUChar32= c<0 ? -c : 0;
628    cnv->fromUnicodeStatus=(uint32_t)prev;
629
630    /* write back the updated pointers */
631    pArgs->source=source;
632    pArgs->target=(char *)target;
633    pArgs->offsets=offsets;
634}
635
636/*
637 * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
638 * If a change is made in the original function, then either
639 * change this function the same way or
640 * re-copy the original function and remove the variables
641 * offsets, sourceIndex, and nextSourceIndex.
642 */
643static void
644_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
645                  UErrorCode *pErrorCode) {
646    UConverter *cnv;
647    const UChar *source, *sourceLimit;
648    uint8_t *target;
649    int32_t targetCapacity;
650
651    int32_t prev, c, diff;
652
653    /* set up the local pointers */
654    cnv=pArgs->converter;
655    source=pArgs->source;
656    sourceLimit=pArgs->sourceLimit;
657    target=(uint8_t *)pArgs->target;
658    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
659
660    /* get the converter state from UConverter */
661    c=cnv->fromUChar32;
662    prev=(int32_t)cnv->fromUnicodeStatus;
663    if(prev==0) {
664        prev=BOCU1_ASCII_PREV;
665    }
666
667    /* conversion loop */
668    if(c!=0 && targetCapacity>0) {
669        goto getTrail;
670    }
671
672fastSingle:
673    /* fast loop for single-byte differences */
674    /* use only one loop counter variable, targetCapacity, not also source */
675    diff=(int32_t)(sourceLimit-source);
676    if(targetCapacity>diff) {
677        targetCapacity=diff;
678    }
679    while(targetCapacity>0 && (c=*source)<0x3000) {
680        if(c<=0x20) {
681            if(c!=0x20) {
682                prev=BOCU1_ASCII_PREV;
683            }
684            *target++=(uint8_t)c;
685        } else {
686            diff=c-prev;
687            if(DIFF_IS_SINGLE(diff)) {
688                prev=BOCU1_SIMPLE_PREV(c);
689                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
690            } else {
691                break;
692            }
693        }
694        ++source;
695        --targetCapacity;
696    }
697    /* restore real values */
698    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
699
700    /* regular loop for all cases */
701    while(source<sourceLimit) {
702        if(targetCapacity>0) {
703            c=*source++;
704
705            if(c<=0x20) {
706                /*
707                 * ISO C0 control & space:
708                 * Encode directly for MIME compatibility,
709                 * and reset state except for space, to not disrupt compression.
710                 */
711                if(c!=0x20) {
712                    prev=BOCU1_ASCII_PREV;
713                }
714                *target++=(uint8_t)c;
715                --targetCapacity;
716                continue;
717            }
718
719            if(U16_IS_LEAD(c)) {
720getTrail:
721                if(source<sourceLimit) {
722                    /* test the following code unit */
723                    UChar trail=*source;
724                    if(U16_IS_TRAIL(trail)) {
725                        ++source;
726                        c=U16_GET_SUPPLEMENTARY(c, trail);
727                    }
728                } else {
729                    /* no more input */
730                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
731                    break;
732                }
733            }
734
735            /*
736             * all other Unicode code points c==U+0021..U+10ffff
737             * are encoded with the difference c-prev
738             *
739             * a new prev is computed from c,
740             * placed in the middle of a 0x80-block (for most small scripts) or
741             * in the middle of the Unihan and Hangul blocks
742             * to statistically minimize the following difference
743             */
744            diff=c-prev;
745            prev=BOCU1_PREV(c);
746            if(DIFF_IS_SINGLE(diff)) {
747                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
748                --targetCapacity;
749                if(c<0x3000) {
750                    goto fastSingle;
751                }
752            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
753                /* optimize 2-byte case */
754                int32_t m;
755
756                if(diff>=0) {
757                    diff-=BOCU1_REACH_POS_1+1;
758                    m=diff%BOCU1_TRAIL_COUNT;
759                    diff/=BOCU1_TRAIL_COUNT;
760                    diff+=BOCU1_START_POS_2;
761                } else {
762                    diff-=BOCU1_REACH_NEG_1;
763                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
764                    diff+=BOCU1_START_NEG_2;
765                }
766                *target++=(uint8_t)diff;
767                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
768                targetCapacity-=2;
769            } else {
770                int32_t length; /* will be 2..4 */
771
772                diff=packDiff(diff);
773                length=BOCU1_LENGTH_FROM_PACKED(diff);
774
775                /* write the output character bytes from diff and length */
776                /* from the first if in the loop we know that targetCapacity>0 */
777                if(length<=targetCapacity) {
778                    switch(length) {
779                        /* each branch falls through to the next one */
780                    case 4:
781                        *target++=(uint8_t)(diff>>24);
782                    case 3: /*fall through*/
783                        *target++=(uint8_t)(diff>>16);
784                    /* case 2: handled above */
785                        *target++=(uint8_t)(diff>>8);
786                    /* case 1: handled above */
787                        *target++=(uint8_t)diff;
788                    default:
789                        /* will never occur */
790                        break;
791                    }
792                    targetCapacity-=length;
793                } else {
794                    uint8_t *charErrorBuffer;
795
796                    /*
797                     * We actually do this backwards here:
798                     * In order to save an intermediate variable, we output
799                     * first to the overflow buffer what does not fit into the
800                     * regular target.
801                     */
802                    /* we know that 1<=targetCapacity<length<=4 */
803                    length-=targetCapacity;
804                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
805                    switch(length) {
806                        /* each branch falls through to the next one */
807                    case 3:
808                        *charErrorBuffer++=(uint8_t)(diff>>16);
809                    case 2: /*fall through*/
810                        *charErrorBuffer++=(uint8_t)(diff>>8);
811                    case 1: /*fall through*/
812                        *charErrorBuffer=(uint8_t)diff;
813                    default:
814                        /* will never occur */
815                        break;
816                    }
817                    cnv->charErrorBufferLength=(int8_t)length;
818
819                    /* now output what fits into the regular target */
820                    diff>>=8*length; /* length was reduced by targetCapacity */
821                    switch(targetCapacity) {
822                        /* each branch falls through to the next one */
823                    case 3:
824                        *target++=(uint8_t)(diff>>16);
825                    case 2: /*fall through*/
826                        *target++=(uint8_t)(diff>>8);
827                    case 1: /*fall through*/
828                        *target++=(uint8_t)diff;
829                    default:
830                        /* will never occur */
831                        break;
832                    }
833
834                    /* target overflow */
835                    targetCapacity=0;
836                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
837                    break;
838                }
839            }
840        } else {
841            /* target is full */
842            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
843            break;
844        }
845    }
846
847    /* set the converter state back into UConverter */
848    cnv->fromUChar32= c<0 ? -c : 0;
849    cnv->fromUnicodeStatus=(uint32_t)prev;
850
851    /* write back the updated pointers */
852    pArgs->source=source;
853    pArgs->target=(char *)target;
854}
855
856/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
857
858/**
859 * Function for BOCU-1 decoder; handles multi-byte lead bytes.
860 *
861 * @param b lead byte;
862 *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
863 * @return (diff<<2)|count
864 */
865static inline int32_t
866decodeBocu1LeadByte(int32_t b) {
867    int32_t diff, count;
868
869    if(b>=BOCU1_START_NEG_2) {
870        /* positive difference */
871        if(b<BOCU1_START_POS_3) {
872            /* two bytes */
873            diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
874            count=1;
875        } else if(b<BOCU1_START_POS_4) {
876            /* three bytes */
877            diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
878            count=2;
879        } else {
880            /* four bytes */
881            diff=BOCU1_REACH_POS_3+1;
882            count=3;
883        }
884    } else {
885        /* negative difference */
886        if(b>=BOCU1_START_NEG_3) {
887            /* two bytes */
888            diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
889            count=1;
890        } else if(b>BOCU1_MIN) {
891            /* three bytes */
892            diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
893            count=2;
894        } else {
895            /* four bytes */
896            diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
897            count=3;
898        }
899    }
900
901    /* return the state for decoding the trail byte(s) */
902    return (diff<<2)|count;
903}
904
905/**
906 * Function for BOCU-1 decoder; handles multi-byte trail bytes.
907 *
908 * @param count number of remaining trail bytes including this one
909 * @param b trail byte
910 * @return new delta for diff including b - <0 indicates an error
911 *
912 * @see decodeBocu1
913 */
914static inline int32_t
915decodeBocu1TrailByte(int32_t count, int32_t b) {
916    if(b<=0x20) {
917        /* skip some C0 controls and make the trail byte range contiguous */
918        b=bocu1ByteToTrail[b];
919        /* b<0 for an illegal trail byte value will result in return<0 below */
920#if BOCU1_MAX_TRAIL<0xff
921    } else if(b>BOCU1_MAX_TRAIL) {
922        return -99;
923#endif
924    } else {
925        b-=BOCU1_TRAIL_BYTE_OFFSET;
926    }
927
928    /* add trail byte into difference and decrement count */
929    if(count==1) {
930        return b;
931    } else if(count==2) {
932        return b*BOCU1_TRAIL_COUNT;
933    } else /* count==3 */ {
934        return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
935    }
936}
937
938static void
939_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
940                           UErrorCode *pErrorCode) {
941    UConverter *cnv;
942    const uint8_t *source, *sourceLimit;
943    UChar *target;
944    const UChar *targetLimit;
945    int32_t *offsets;
946
947    int32_t prev, count, diff, c;
948
949    int8_t byteIndex;
950    uint8_t *bytes;
951
952    int32_t sourceIndex, nextSourceIndex;
953
954    /* set up the local pointers */
955    cnv=pArgs->converter;
956    source=(const uint8_t *)pArgs->source;
957    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
958    target=pArgs->target;
959    targetLimit=pArgs->targetLimit;
960    offsets=pArgs->offsets;
961
962    /* get the converter state from UConverter */
963    prev=(int32_t)cnv->toUnicodeStatus;
964    if(prev==0) {
965        prev=BOCU1_ASCII_PREV;
966    }
967    diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
968    count=diff&3;
969    diff>>=2;
970
971    byteIndex=cnv->toULength;
972    bytes=cnv->toUBytes;
973
974    /* sourceIndex=-1 if the current character began in the previous buffer */
975    sourceIndex=byteIndex==0 ? 0 : -1;
976    nextSourceIndex=0;
977
978    /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
979    if(count>0 && byteIndex>0 && target<targetLimit) {
980        goto getTrail;
981    }
982
983fastSingle:
984    /* fast loop for single-byte differences */
985    /* use count as the only loop counter variable */
986    diff=(int32_t)(sourceLimit-source);
987    count=(int32_t)(pArgs->targetLimit-target);
988    if(count>diff) {
989        count=diff;
990    }
991    while(count>0) {
992        if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
993            c=prev+(c-BOCU1_MIDDLE);
994            if(c<0x3000) {
995                *target++=(UChar)c;
996                *offsets++=nextSourceIndex++;
997                prev=BOCU1_SIMPLE_PREV(c);
998            } else {
999                break;
1000            }
1001        } else if(c<=0x20) {
1002            if(c!=0x20) {
1003                prev=BOCU1_ASCII_PREV;
1004            }
1005            *target++=(UChar)c;
1006            *offsets++=nextSourceIndex++;
1007        } else {
1008            break;
1009        }
1010        ++source;
1011        --count;
1012    }
1013    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
1014
1015    /* decode a sequence of single and lead bytes */
1016    while(source<sourceLimit) {
1017        if(target>=targetLimit) {
1018            /* target is full */
1019            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1020            break;
1021        }
1022
1023        ++nextSourceIndex;
1024        c=*source++;
1025        if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
1026            /* Write a code point directly from a single-byte difference. */
1027            c=prev+(c-BOCU1_MIDDLE);
1028            if(c<0x3000) {
1029                *target++=(UChar)c;
1030                *offsets++=sourceIndex;
1031                prev=BOCU1_SIMPLE_PREV(c);
1032                sourceIndex=nextSourceIndex;
1033                goto fastSingle;
1034            }
1035        } else if(c<=0x20) {
1036            /*
1037             * Direct-encoded C0 control code or space.
1038             * Reset prev for C0 control codes but not for space.
1039             */
1040            if(c!=0x20) {
1041                prev=BOCU1_ASCII_PREV;
1042            }
1043            *target++=(UChar)c;
1044            *offsets++=sourceIndex;
1045            sourceIndex=nextSourceIndex;
1046            continue;
1047        } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
1048            /* Optimize two-byte case. */
1049            if(c>=BOCU1_MIDDLE) {
1050                diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
1051            } else {
1052                diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
1053            }
1054
1055            /* trail byte */
1056            ++nextSourceIndex;
1057            c=decodeBocu1TrailByte(1, *source++);
1058            if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
1059                bytes[0]=source[-2];
1060                bytes[1]=source[-1];
1061                byteIndex=2;
1062                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1063                break;
1064            }
1065        } else if(c==BOCU1_RESET) {
1066            /* only reset the state, no code point */
1067            prev=BOCU1_ASCII_PREV;
1068            sourceIndex=nextSourceIndex;
1069            continue;
1070        } else {
1071            /*
1072             * For multi-byte difference lead bytes, set the decoder state
1073             * with the partial difference value from the lead byte and
1074             * with the number of trail bytes.
1075             */
1076            bytes[0]=(uint8_t)c;
1077            byteIndex=1;
1078
1079            diff=decodeBocu1LeadByte(c);
1080            count=diff&3;
1081            diff>>=2;
1082getTrail:
1083            for(;;) {
1084                if(source>=sourceLimit) {
1085                    goto endloop;
1086                }
1087                ++nextSourceIndex;
1088                c=bytes[byteIndex++]=*source++;
1089
1090                /* trail byte in any position */
1091                c=decodeBocu1TrailByte(count, c);
1092                if(c<0) {
1093                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1094                    goto endloop;
1095                }
1096
1097                diff+=c;
1098                if(--count==0) {
1099                    /* final trail byte, deliver a code point */
1100                    byteIndex=0;
1101                    c=prev+diff;
1102                    if((uint32_t)c>0x10ffff) {
1103                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1104                        goto endloop;
1105                    }
1106                    break;
1107                }
1108            }
1109        }
1110
1111        /* calculate the next prev and output c */
1112        prev=BOCU1_PREV(c);
1113        if(c<=0xffff) {
1114            *target++=(UChar)c;
1115            *offsets++=sourceIndex;
1116        } else {
1117            /* output surrogate pair */
1118            *target++=U16_LEAD(c);
1119            if(target<targetLimit) {
1120                *target++=U16_TRAIL(c);
1121                *offsets++=sourceIndex;
1122                *offsets++=sourceIndex;
1123            } else {
1124                /* target overflow */
1125                *offsets++=sourceIndex;
1126                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
1127                cnv->UCharErrorBufferLength=1;
1128                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1129                break;
1130            }
1131        }
1132        sourceIndex=nextSourceIndex;
1133    }
1134endloop:
1135
1136    if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
1137        /* set the converter state in UConverter to deal with the next character */
1138        cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
1139        cnv->mode=0;
1140    } else {
1141        /* set the converter state back into UConverter */
1142        cnv->toUnicodeStatus=(uint32_t)prev;
1143        cnv->mode=(diff<<2)|count;
1144    }
1145    cnv->toULength=byteIndex;
1146
1147    /* write back the updated pointers */
1148    pArgs->source=(const char *)source;
1149    pArgs->target=target;
1150    pArgs->offsets=offsets;
1151    return;
1152}
1153
1154/*
1155 * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
1156 * If a change is made in the original function, then either
1157 * change this function the same way or
1158 * re-copy the original function and remove the variables
1159 * offsets, sourceIndex, and nextSourceIndex.
1160 */
1161static void
1162_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
1163                UErrorCode *pErrorCode) {
1164    UConverter *cnv;
1165    const uint8_t *source, *sourceLimit;
1166    UChar *target;
1167    const UChar *targetLimit;
1168
1169    int32_t prev, count, diff, c;
1170
1171    int8_t byteIndex;
1172    uint8_t *bytes;
1173
1174U_ALIGN_CODE(16)
1175
1176    /* set up the local pointers */
1177    cnv=pArgs->converter;
1178    source=(const uint8_t *)pArgs->source;
1179    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
1180    target=pArgs->target;
1181    targetLimit=pArgs->targetLimit;
1182
1183    /* get the converter state from UConverter */
1184    prev=(int32_t)cnv->toUnicodeStatus;
1185    if(prev==0) {
1186        prev=BOCU1_ASCII_PREV;
1187    }
1188    diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
1189    count=diff&3;
1190    diff>>=2;
1191
1192    byteIndex=cnv->toULength;
1193    bytes=cnv->toUBytes;
1194
1195    /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
1196    if(count>0 && byteIndex>0 && target<targetLimit) {
1197        goto getTrail;
1198    }
1199
1200fastSingle:
1201    /* fast loop for single-byte differences */
1202    /* use count as the only loop counter variable */
1203    diff=(int32_t)(sourceLimit-source);
1204    count=(int32_t)(pArgs->targetLimit-target);
1205    if(count>diff) {
1206        count=diff;
1207    }
1208    while(count>0) {
1209        if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
1210            c=prev+(c-BOCU1_MIDDLE);
1211            if(c<0x3000) {
1212                *target++=(UChar)c;
1213                prev=BOCU1_SIMPLE_PREV(c);
1214            } else {
1215                break;
1216            }
1217        } else if(c<=0x20) {
1218            if(c!=0x20) {
1219                prev=BOCU1_ASCII_PREV;
1220            }
1221            *target++=(UChar)c;
1222        } else {
1223            break;
1224        }
1225        ++source;
1226        --count;
1227    }
1228
1229    /* decode a sequence of single and lead bytes */
1230    while(source<sourceLimit) {
1231        if(target>=targetLimit) {
1232            /* target is full */
1233            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1234            break;
1235        }
1236
1237        c=*source++;
1238        if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
1239            /* Write a code point directly from a single-byte difference. */
1240            c=prev+(c-BOCU1_MIDDLE);
1241            if(c<0x3000) {
1242                *target++=(UChar)c;
1243                prev=BOCU1_SIMPLE_PREV(c);
1244                goto fastSingle;
1245            }
1246        } else if(c<=0x20) {
1247            /*
1248             * Direct-encoded C0 control code or space.
1249             * Reset prev for C0 control codes but not for space.
1250             */
1251            if(c!=0x20) {
1252                prev=BOCU1_ASCII_PREV;
1253            }
1254            *target++=(UChar)c;
1255            continue;
1256        } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
1257            /* Optimize two-byte case. */
1258            if(c>=BOCU1_MIDDLE) {
1259                diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
1260            } else {
1261                diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
1262            }
1263
1264            /* trail byte */
1265            c=decodeBocu1TrailByte(1, *source++);
1266            if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
1267                bytes[0]=source[-2];
1268                bytes[1]=source[-1];
1269                byteIndex=2;
1270                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1271                break;
1272            }
1273        } else if(c==BOCU1_RESET) {
1274            /* only reset the state, no code point */
1275            prev=BOCU1_ASCII_PREV;
1276            continue;
1277        } else {
1278            /*
1279             * For multi-byte difference lead bytes, set the decoder state
1280             * with the partial difference value from the lead byte and
1281             * with the number of trail bytes.
1282             */
1283            bytes[0]=(uint8_t)c;
1284            byteIndex=1;
1285
1286            diff=decodeBocu1LeadByte(c);
1287            count=diff&3;
1288            diff>>=2;
1289getTrail:
1290            for(;;) {
1291                if(source>=sourceLimit) {
1292                    goto endloop;
1293                }
1294                c=bytes[byteIndex++]=*source++;
1295
1296                /* trail byte in any position */
1297                c=decodeBocu1TrailByte(count, c);
1298                if(c<0) {
1299                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1300                    goto endloop;
1301                }
1302
1303                diff+=c;
1304                if(--count==0) {
1305                    /* final trail byte, deliver a code point */
1306                    byteIndex=0;
1307                    c=prev+diff;
1308                    if((uint32_t)c>0x10ffff) {
1309                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1310                        goto endloop;
1311                    }
1312                    break;
1313                }
1314            }
1315        }
1316
1317        /* calculate the next prev and output c */
1318        prev=BOCU1_PREV(c);
1319        if(c<=0xffff) {
1320            *target++=(UChar)c;
1321        } else {
1322            /* output surrogate pair */
1323            *target++=U16_LEAD(c);
1324            if(target<targetLimit) {
1325                *target++=U16_TRAIL(c);
1326            } else {
1327                /* target overflow */
1328                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
1329                cnv->UCharErrorBufferLength=1;
1330                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1331                break;
1332            }
1333        }
1334    }
1335endloop:
1336
1337    if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
1338        /* set the converter state in UConverter to deal with the next character */
1339        cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
1340        cnv->mode=0;
1341    } else {
1342        /* set the converter state back into UConverter */
1343        cnv->toUnicodeStatus=(uint32_t)prev;
1344        cnv->mode=(diff<<2)|count;
1345    }
1346    cnv->toULength=byteIndex;
1347
1348    /* write back the updated pointers */
1349    pArgs->source=(const char *)source;
1350    pArgs->target=target;
1351    return;
1352}
1353
1354/* miscellaneous ------------------------------------------------------------ */
1355
1356static const UConverterImpl _Bocu1Impl={
1357    UCNV_BOCU1,
1358
1359    NULL,
1360    NULL,
1361
1362    NULL,
1363    NULL,
1364    NULL,
1365
1366    _Bocu1ToUnicode,
1367    _Bocu1ToUnicodeWithOffsets,
1368    _Bocu1FromUnicode,
1369    _Bocu1FromUnicodeWithOffsets,
1370    NULL,
1371
1372    NULL,
1373    NULL,
1374    NULL,
1375    NULL,
1376    ucnv_getCompleteUnicodeSet,
1377
1378    NULL,
1379    NULL
1380};
1381
1382static const UConverterStaticData _Bocu1StaticData={
1383    sizeof(UConverterStaticData),
1384    "BOCU-1",
1385    1214, /* CCSID for BOCU-1 */
1386    UCNV_IBM, UCNV_BOCU1,
1387    1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
1388    { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
1389    FALSE, FALSE,
1390    0,
1391    0,
1392    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1393};
1394
1395const UConverterSharedData _Bocu1Data={
1396    sizeof(UConverterSharedData), ~((uint32_t)0),
1397    NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl,
1398    0,
1399    UCNV_MBCS_TABLE_INITIALIZER
1400};
1401
1402#endif
1403