1/*
2******************************************************************************
3*
4*   Copyright (C) 2000-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ucnvscsu.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2000nov18
14*   created by: Markus W. Scherer
15*
16*   This is an implementation of the Standard Compression Scheme for Unicode
17*   as defined in http://www.unicode.org/unicode/reports/tr6/ .
18*   Reserved commands and window settings are treated as illegal sequences and
19*   will result in callback calls.
20*/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_CONVERSION
25
26#include "unicode/ucnv.h"
27#include "unicode/ucnv_cb.h"
28#include "unicode/utf16.h"
29#include "ucnv_bld.h"
30#include "ucnv_cnv.h"
31#include "cmemory.h"
32
33/* SCSU definitions --------------------------------------------------------- */
34
35/* SCSU command byte values */
36enum {
37    SQ0=0x01, /* Quote from window pair 0 */
38    SQ7=0x08, /* Quote from window pair 7 */
39    SDX=0x0B, /* Define a window as extended */
40    Srs=0x0C, /* reserved */
41    SQU=0x0E, /* Quote a single Unicode character */
42    SCU=0x0F, /* Change to Unicode mode */
43    SC0=0x10, /* Select window 0 */
44    SC7=0x17, /* Select window 7 */
45    SD0=0x18, /* Define and select window 0 */
46    SD7=0x1F, /* Define and select window 7 */
47
48    UC0=0xE0, /* Select window 0 */
49    UC7=0xE7, /* Select window 7 */
50    UD0=0xE8, /* Define and select window 0 */
51    UD7=0xEF, /* Define and select window 7 */
52    UQU=0xF0, /* Quote a single Unicode character */
53    UDX=0xF1, /* Define a Window as extended */
54    Urs=0xF2  /* reserved */
55};
56
57enum {
58    /*
59     * Unicode code points from 3400 to E000 are not adressible by
60     * dynamic window, since in these areas no short run alphabets are
61     * found. Therefore add gapOffset to all values from gapThreshold.
62     */
63    gapThreshold=0x68,
64    gapOffset=0xAC00,
65
66    /* values between reservedStart and fixedThreshold are reserved */
67    reservedStart=0xA8,
68
69    /* use table of predefined fixed offsets for values from fixedThreshold */
70    fixedThreshold=0xF9
71};
72
73/* constant offsets for the 8 static windows */
74static const uint32_t staticOffsets[8]={
75    0x0000, /* ASCII for quoted tags */
76    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
77    0x0100, /* Latin Extended-A */
78    0x0300, /* Combining Diacritical Marks */
79    0x2000, /* General Punctuation */
80    0x2080, /* Currency Symbols */
81    0x2100, /* Letterlike Symbols and Number Forms */
82    0x3000  /* CJK Symbols and punctuation */
83};
84
85/* initial offsets for the 8 dynamic (sliding) windows */
86static const uint32_t initialDynamicOffsets[8]={
87    0x0080, /* Latin-1 */
88    0x00C0, /* Latin Extended A */
89    0x0400, /* Cyrillic */
90    0x0600, /* Arabic */
91    0x0900, /* Devanagari */
92    0x3040, /* Hiragana */
93    0x30A0, /* Katakana */
94    0xFF00  /* Fullwidth ASCII */
95};
96
97/* Table of fixed predefined Offsets */
98static const uint32_t fixedOffsets[]={
99    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
100    /* 0xFA */ 0x0250, /* IPA extensions */
101    /* 0xFB */ 0x0370, /* Greek */
102    /* 0xFC */ 0x0530, /* Armenian */
103    /* 0xFD */ 0x3040, /* Hiragana */
104    /* 0xFE */ 0x30A0, /* Katakana */
105    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
106};
107
108/* state values */
109enum {
110    readCommand,
111    quotePairOne,
112    quotePairTwo,
113    quoteOne,
114    definePairOne,
115    definePairTwo,
116    defineOne
117};
118
119typedef struct SCSUData {
120    /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
121    uint32_t toUDynamicOffsets[8];
122    uint32_t fromUDynamicOffsets[8];
123
124    /* state machine state - toUnicode */
125    UBool toUIsSingleByteMode;
126    uint8_t toUState;
127    int8_t toUQuoteWindow, toUDynamicWindow;
128    uint8_t toUByteOne;
129    uint8_t toUPadding[3];
130
131    /* state machine state - fromUnicode */
132    UBool fromUIsSingleByteMode;
133    int8_t fromUDynamicWindow;
134
135    /*
136     * windowUse[] keeps track of the use of the dynamic windows:
137     * At nextWindowUseIndex there is the least recently used window,
138     * and the following windows (in a wrapping manner) are more and more
139     * recently used.
140     * At nextWindowUseIndex-1 there is the most recently used window.
141     */
142    uint8_t locale;
143    int8_t nextWindowUseIndex;
144    int8_t windowUse[8];
145} SCSUData;
146
147static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
148static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
149
150enum {
151    lGeneric, l_ja
152};
153
154/* SCSU setup functions ----------------------------------------------------- */
155
156static void
157_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
158    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
159
160    if(choice<=UCNV_RESET_TO_UNICODE) {
161        /* reset toUnicode */
162        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
163
164        scsu->toUIsSingleByteMode=TRUE;
165        scsu->toUState=readCommand;
166        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
167        scsu->toUByteOne=0;
168
169        cnv->toULength=0;
170    }
171    if(choice!=UCNV_RESET_TO_UNICODE) {
172        /* reset fromUnicode */
173        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
174
175        scsu->fromUIsSingleByteMode=TRUE;
176        scsu->fromUDynamicWindow=0;
177
178        scsu->nextWindowUseIndex=0;
179        switch(scsu->locale) {
180        case l_ja:
181            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
182            break;
183        default:
184            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
185            break;
186        }
187
188        cnv->fromUChar32=0;
189    }
190}
191
192static void
193_SCSUOpen(UConverter *cnv,
194          UConverterLoadArgs *pArgs,
195          UErrorCode *pErrorCode) {
196    const char *locale=pArgs->locale;
197    if(pArgs->onlyTestIsLoadable) {
198        return;
199    }
200    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
201    if(cnv->extraInfo!=NULL) {
202        if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
203            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
204        } else {
205            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
206        }
207        _SCSUReset(cnv, UCNV_RESET_BOTH);
208    } else {
209        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
210    }
211
212    /* Set the substitution character U+fffd as a Unicode string. */
213    cnv->subUChars[0]=0xfffd;
214    cnv->subCharLen=-1;
215}
216
217static void
218_SCSUClose(UConverter *cnv) {
219    if(cnv->extraInfo!=NULL) {
220        if(!cnv->isExtraLocal) {
221            uprv_free(cnv->extraInfo);
222        }
223        cnv->extraInfo=NULL;
224    }
225}
226
227/* SCSU-to-Unicode conversion functions ------------------------------------- */
228
229static void
230_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
231                          UErrorCode *pErrorCode) {
232    UConverter *cnv;
233    SCSUData *scsu;
234    const uint8_t *source, *sourceLimit;
235    UChar *target;
236    const UChar *targetLimit;
237    int32_t *offsets;
238    UBool isSingleByteMode;
239    uint8_t state, byteOne;
240    int8_t quoteWindow, dynamicWindow;
241
242    int32_t sourceIndex, nextSourceIndex;
243
244    uint8_t b;
245
246    /* set up the local pointers */
247    cnv=pArgs->converter;
248    scsu=(SCSUData *)cnv->extraInfo;
249
250    source=(const uint8_t *)pArgs->source;
251    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
252    target=pArgs->target;
253    targetLimit=pArgs->targetLimit;
254    offsets=pArgs->offsets;
255
256    /* get the state machine state */
257    isSingleByteMode=scsu->toUIsSingleByteMode;
258    state=scsu->toUState;
259    quoteWindow=scsu->toUQuoteWindow;
260    dynamicWindow=scsu->toUDynamicWindow;
261    byteOne=scsu->toUByteOne;
262
263    /* sourceIndex=-1 if the current character began in the previous buffer */
264    sourceIndex=state==readCommand ? 0 : -1;
265    nextSourceIndex=0;
266
267    /*
268     * conversion "loop"
269     *
270     * For performance, this is not a normal C loop.
271     * Instead, there are two code blocks for the two SCSU modes.
272     * The function branches to either one, and a change of the mode is done with a goto to
273     * the other branch.
274     *
275     * Each branch has two conventional loops:
276     * - a fast-path loop for the most common codes in the mode
277     * - a loop for all other codes in the mode
278     * When the fast-path runs into a code that it cannot handle, its loop ends and it
279     * runs into the following loop to handle the other codes.
280     * The end of the input or output buffer is also handled by the slower loop.
281     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
282     *
283     * The callback handling is done by returning with an error code.
284     * The conversion framework actually calls the callback function.
285     */
286    if(isSingleByteMode) {
287        /* fast path for single-byte mode */
288        if(state==readCommand) {
289fastSingle:
290            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
291                ++source;
292                ++nextSourceIndex;
293                if(b<=0x7f) {
294                    /* write US-ASCII graphic character or DEL */
295                    *target++=(UChar)b;
296                    if(offsets!=NULL) {
297                        *offsets++=sourceIndex;
298                    }
299                } else {
300                    /* write from dynamic window */
301                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
302                    if(c<=0xffff) {
303                        *target++=(UChar)c;
304                        if(offsets!=NULL) {
305                            *offsets++=sourceIndex;
306                        }
307                    } else {
308                        /* output surrogate pair */
309                        *target++=(UChar)(0xd7c0+(c>>10));
310                        if(target<targetLimit) {
311                            *target++=(UChar)(0xdc00|(c&0x3ff));
312                            if(offsets!=NULL) {
313                                *offsets++=sourceIndex;
314                                *offsets++=sourceIndex;
315                            }
316                        } else {
317                            /* target overflow */
318                            if(offsets!=NULL) {
319                                *offsets++=sourceIndex;
320                            }
321                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
322                            cnv->UCharErrorBufferLength=1;
323                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
324                            goto endloop;
325                        }
326                    }
327                }
328                sourceIndex=nextSourceIndex;
329            }
330        }
331
332        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
333singleByteMode:
334        while(source<sourceLimit) {
335            if(target>=targetLimit) {
336                /* target is full */
337                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
338                break;
339            }
340            b=*source++;
341            ++nextSourceIndex;
342            switch(state) {
343            case readCommand:
344                /* redundant conditions are commented out */
345                /* here: b<0x20 because otherwise we would be in fastSingle */
346                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
347                    /* CR/LF/TAB/NUL */
348                    *target++=(UChar)b;
349                    if(offsets!=NULL) {
350                        *offsets++=sourceIndex;
351                    }
352                    sourceIndex=nextSourceIndex;
353                    goto fastSingle;
354                } else if(SC0<=b) {
355                    if(b<=SC7) {
356                        dynamicWindow=(int8_t)(b-SC0);
357                        sourceIndex=nextSourceIndex;
358                        goto fastSingle;
359                    } else /* if(SD0<=b && b<=SD7) */ {
360                        dynamicWindow=(int8_t)(b-SD0);
361                        state=defineOne;
362                    }
363                } else if(/* SQ0<=b && */ b<=SQ7) {
364                    quoteWindow=(int8_t)(b-SQ0);
365                    state=quoteOne;
366                } else if(b==SDX) {
367                    state=definePairOne;
368                } else if(b==SQU) {
369                    state=quotePairOne;
370                } else if(b==SCU) {
371                    sourceIndex=nextSourceIndex;
372                    isSingleByteMode=FALSE;
373                    goto fastUnicode;
374                } else /* Srs */ {
375                    /* callback(illegal) */
376                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
377                    cnv->toUBytes[0]=b;
378                    cnv->toULength=1;
379                    goto endloop;
380                }
381
382                /* store the first byte of a multibyte sequence in toUBytes[] */
383                cnv->toUBytes[0]=b;
384                cnv->toULength=1;
385                break;
386            case quotePairOne:
387                byteOne=b;
388                cnv->toUBytes[1]=b;
389                cnv->toULength=2;
390                state=quotePairTwo;
391                break;
392            case quotePairTwo:
393                *target++=(UChar)((byteOne<<8)|b);
394                if(offsets!=NULL) {
395                    *offsets++=sourceIndex;
396                }
397                sourceIndex=nextSourceIndex;
398                state=readCommand;
399                goto fastSingle;
400            case quoteOne:
401                if(b<0x80) {
402                    /* all static offsets are in the BMP */
403                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
404                    if(offsets!=NULL) {
405                        *offsets++=sourceIndex;
406                    }
407                } else {
408                    /* write from dynamic window */
409                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
410                    if(c<=0xffff) {
411                        *target++=(UChar)c;
412                        if(offsets!=NULL) {
413                            *offsets++=sourceIndex;
414                        }
415                    } else {
416                        /* output surrogate pair */
417                        *target++=(UChar)(0xd7c0+(c>>10));
418                        if(target<targetLimit) {
419                            *target++=(UChar)(0xdc00|(c&0x3ff));
420                            if(offsets!=NULL) {
421                                *offsets++=sourceIndex;
422                                *offsets++=sourceIndex;
423                            }
424                        } else {
425                            /* target overflow */
426                            if(offsets!=NULL) {
427                                *offsets++=sourceIndex;
428                            }
429                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
430                            cnv->UCharErrorBufferLength=1;
431                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
432                            goto endloop;
433                        }
434                    }
435                }
436                sourceIndex=nextSourceIndex;
437                state=readCommand;
438                goto fastSingle;
439            case definePairOne:
440                dynamicWindow=(int8_t)((b>>5)&7);
441                byteOne=(uint8_t)(b&0x1f);
442                cnv->toUBytes[1]=b;
443                cnv->toULength=2;
444                state=definePairTwo;
445                break;
446            case definePairTwo:
447                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
448                sourceIndex=nextSourceIndex;
449                state=readCommand;
450                goto fastSingle;
451            case defineOne:
452                if(b==0) {
453                    /* callback(illegal): Reserved window offset value 0 */
454                    cnv->toUBytes[1]=b;
455                    cnv->toULength=2;
456                    goto endloop;
457                } else if(b<gapThreshold) {
458                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
459                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
460                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
461                } else if(b>=fixedThreshold) {
462                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
463                } else {
464                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
465                    cnv->toUBytes[1]=b;
466                    cnv->toULength=2;
467                    goto endloop;
468                }
469                sourceIndex=nextSourceIndex;
470                state=readCommand;
471                goto fastSingle;
472            }
473        }
474    } else {
475        /* fast path for Unicode mode */
476        if(state==readCommand) {
477fastUnicode:
478            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
479                *target++=(UChar)((b<<8)|source[1]);
480                if(offsets!=NULL) {
481                    *offsets++=sourceIndex;
482                }
483                sourceIndex=nextSourceIndex;
484                nextSourceIndex+=2;
485                source+=2;
486            }
487        }
488
489        /* normal state machine for Unicode mode */
490/* unicodeByteMode: */
491        while(source<sourceLimit) {
492            if(target>=targetLimit) {
493                /* target is full */
494                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
495                break;
496            }
497            b=*source++;
498            ++nextSourceIndex;
499            switch(state) {
500            case readCommand:
501                if((uint8_t)(b-UC0)>(Urs-UC0)) {
502                    byteOne=b;
503                    cnv->toUBytes[0]=b;
504                    cnv->toULength=1;
505                    state=quotePairTwo;
506                } else if(/* UC0<=b && */ b<=UC7) {
507                    dynamicWindow=(int8_t)(b-UC0);
508                    sourceIndex=nextSourceIndex;
509                    isSingleByteMode=TRUE;
510                    goto fastSingle;
511                } else if(/* UD0<=b && */ b<=UD7) {
512                    dynamicWindow=(int8_t)(b-UD0);
513                    isSingleByteMode=TRUE;
514                    cnv->toUBytes[0]=b;
515                    cnv->toULength=1;
516                    state=defineOne;
517                    goto singleByteMode;
518                } else if(b==UDX) {
519                    isSingleByteMode=TRUE;
520                    cnv->toUBytes[0]=b;
521                    cnv->toULength=1;
522                    state=definePairOne;
523                    goto singleByteMode;
524                } else if(b==UQU) {
525                    cnv->toUBytes[0]=b;
526                    cnv->toULength=1;
527                    state=quotePairOne;
528                } else /* Urs */ {
529                    /* callback(illegal) */
530                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
531                    cnv->toUBytes[0]=b;
532                    cnv->toULength=1;
533                    goto endloop;
534                }
535                break;
536            case quotePairOne:
537                byteOne=b;
538                cnv->toUBytes[1]=b;
539                cnv->toULength=2;
540                state=quotePairTwo;
541                break;
542            case quotePairTwo:
543                *target++=(UChar)((byteOne<<8)|b);
544                if(offsets!=NULL) {
545                    *offsets++=sourceIndex;
546                }
547                sourceIndex=nextSourceIndex;
548                state=readCommand;
549                goto fastUnicode;
550            }
551        }
552    }
553endloop:
554
555    /* set the converter state back into UConverter */
556    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
557        /* reset to deal with the next character */
558        state=readCommand;
559    } else if(state==readCommand) {
560        /* not in a multi-byte sequence, reset toULength */
561        cnv->toULength=0;
562    }
563    scsu->toUIsSingleByteMode=isSingleByteMode;
564    scsu->toUState=state;
565    scsu->toUQuoteWindow=quoteWindow;
566    scsu->toUDynamicWindow=dynamicWindow;
567    scsu->toUByteOne=byteOne;
568
569    /* write back the updated pointers */
570    pArgs->source=(const char *)source;
571    pArgs->target=target;
572    pArgs->offsets=offsets;
573    return;
574}
575
576/*
577 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
578 * If a change is made in the original function, then either
579 * change this function the same way or
580 * re-copy the original function and remove the variables
581 * offsets, sourceIndex, and nextSourceIndex.
582 */
583static void
584_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
585               UErrorCode *pErrorCode) {
586    UConverter *cnv;
587    SCSUData *scsu;
588    const uint8_t *source, *sourceLimit;
589    UChar *target;
590    const UChar *targetLimit;
591    UBool isSingleByteMode;
592    uint8_t state, byteOne;
593    int8_t quoteWindow, dynamicWindow;
594
595    uint8_t b;
596
597    /* set up the local pointers */
598    cnv=pArgs->converter;
599    scsu=(SCSUData *)cnv->extraInfo;
600
601    source=(const uint8_t *)pArgs->source;
602    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
603    target=pArgs->target;
604    targetLimit=pArgs->targetLimit;
605
606    /* get the state machine state */
607    isSingleByteMode=scsu->toUIsSingleByteMode;
608    state=scsu->toUState;
609    quoteWindow=scsu->toUQuoteWindow;
610    dynamicWindow=scsu->toUDynamicWindow;
611    byteOne=scsu->toUByteOne;
612
613    /*
614     * conversion "loop"
615     *
616     * For performance, this is not a normal C loop.
617     * Instead, there are two code blocks for the two SCSU modes.
618     * The function branches to either one, and a change of the mode is done with a goto to
619     * the other branch.
620     *
621     * Each branch has two conventional loops:
622     * - a fast-path loop for the most common codes in the mode
623     * - a loop for all other codes in the mode
624     * When the fast-path runs into a code that it cannot handle, its loop ends and it
625     * runs into the following loop to handle the other codes.
626     * The end of the input or output buffer is also handled by the slower loop.
627     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
628     *
629     * The callback handling is done by returning with an error code.
630     * The conversion framework actually calls the callback function.
631     */
632    if(isSingleByteMode) {
633        /* fast path for single-byte mode */
634        if(state==readCommand) {
635fastSingle:
636            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
637                ++source;
638                if(b<=0x7f) {
639                    /* write US-ASCII graphic character or DEL */
640                    *target++=(UChar)b;
641                } else {
642                    /* write from dynamic window */
643                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
644                    if(c<=0xffff) {
645                        *target++=(UChar)c;
646                    } else {
647                        /* output surrogate pair */
648                        *target++=(UChar)(0xd7c0+(c>>10));
649                        if(target<targetLimit) {
650                            *target++=(UChar)(0xdc00|(c&0x3ff));
651                        } else {
652                            /* target overflow */
653                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
654                            cnv->UCharErrorBufferLength=1;
655                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
656                            goto endloop;
657                        }
658                    }
659                }
660            }
661        }
662
663        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
664singleByteMode:
665        while(source<sourceLimit) {
666            if(target>=targetLimit) {
667                /* target is full */
668                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
669                break;
670            }
671            b=*source++;
672            switch(state) {
673            case readCommand:
674                /* redundant conditions are commented out */
675                /* here: b<0x20 because otherwise we would be in fastSingle */
676                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
677                    /* CR/LF/TAB/NUL */
678                    *target++=(UChar)b;
679                    goto fastSingle;
680                } else if(SC0<=b) {
681                    if(b<=SC7) {
682                        dynamicWindow=(int8_t)(b-SC0);
683                        goto fastSingle;
684                    } else /* if(SD0<=b && b<=SD7) */ {
685                        dynamicWindow=(int8_t)(b-SD0);
686                        state=defineOne;
687                    }
688                } else if(/* SQ0<=b && */ b<=SQ7) {
689                    quoteWindow=(int8_t)(b-SQ0);
690                    state=quoteOne;
691                } else if(b==SDX) {
692                    state=definePairOne;
693                } else if(b==SQU) {
694                    state=quotePairOne;
695                } else if(b==SCU) {
696                    isSingleByteMode=FALSE;
697                    goto fastUnicode;
698                } else /* Srs */ {
699                    /* callback(illegal) */
700                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
701                    cnv->toUBytes[0]=b;
702                    cnv->toULength=1;
703                    goto endloop;
704                }
705
706                /* store the first byte of a multibyte sequence in toUBytes[] */
707                cnv->toUBytes[0]=b;
708                cnv->toULength=1;
709                break;
710            case quotePairOne:
711                byteOne=b;
712                cnv->toUBytes[1]=b;
713                cnv->toULength=2;
714                state=quotePairTwo;
715                break;
716            case quotePairTwo:
717                *target++=(UChar)((byteOne<<8)|b);
718                state=readCommand;
719                goto fastSingle;
720            case quoteOne:
721                if(b<0x80) {
722                    /* all static offsets are in the BMP */
723                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
724                } else {
725                    /* write from dynamic window */
726                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
727                    if(c<=0xffff) {
728                        *target++=(UChar)c;
729                    } else {
730                        /* output surrogate pair */
731                        *target++=(UChar)(0xd7c0+(c>>10));
732                        if(target<targetLimit) {
733                            *target++=(UChar)(0xdc00|(c&0x3ff));
734                        } else {
735                            /* target overflow */
736                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
737                            cnv->UCharErrorBufferLength=1;
738                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
739                            goto endloop;
740                        }
741                    }
742                }
743                state=readCommand;
744                goto fastSingle;
745            case definePairOne:
746                dynamicWindow=(int8_t)((b>>5)&7);
747                byteOne=(uint8_t)(b&0x1f);
748                cnv->toUBytes[1]=b;
749                cnv->toULength=2;
750                state=definePairTwo;
751                break;
752            case definePairTwo:
753                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
754                state=readCommand;
755                goto fastSingle;
756            case defineOne:
757                if(b==0) {
758                    /* callback(illegal): Reserved window offset value 0 */
759                    cnv->toUBytes[1]=b;
760                    cnv->toULength=2;
761                    goto endloop;
762                } else if(b<gapThreshold) {
763                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
764                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
765                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
766                } else if(b>=fixedThreshold) {
767                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
768                } else {
769                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
770                    cnv->toUBytes[1]=b;
771                    cnv->toULength=2;
772                    goto endloop;
773                }
774                state=readCommand;
775                goto fastSingle;
776            }
777        }
778    } else {
779        /* fast path for Unicode mode */
780        if(state==readCommand) {
781fastUnicode:
782            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
783                *target++=(UChar)((b<<8)|source[1]);
784                source+=2;
785            }
786        }
787
788        /* normal state machine for Unicode mode */
789/* unicodeByteMode: */
790        while(source<sourceLimit) {
791            if(target>=targetLimit) {
792                /* target is full */
793                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
794                break;
795            }
796            b=*source++;
797            switch(state) {
798            case readCommand:
799                if((uint8_t)(b-UC0)>(Urs-UC0)) {
800                    byteOne=b;
801                    cnv->toUBytes[0]=b;
802                    cnv->toULength=1;
803                    state=quotePairTwo;
804                } else if(/* UC0<=b && */ b<=UC7) {
805                    dynamicWindow=(int8_t)(b-UC0);
806                    isSingleByteMode=TRUE;
807                    goto fastSingle;
808                } else if(/* UD0<=b && */ b<=UD7) {
809                    dynamicWindow=(int8_t)(b-UD0);
810                    isSingleByteMode=TRUE;
811                    cnv->toUBytes[0]=b;
812                    cnv->toULength=1;
813                    state=defineOne;
814                    goto singleByteMode;
815                } else if(b==UDX) {
816                    isSingleByteMode=TRUE;
817                    cnv->toUBytes[0]=b;
818                    cnv->toULength=1;
819                    state=definePairOne;
820                    goto singleByteMode;
821                } else if(b==UQU) {
822                    cnv->toUBytes[0]=b;
823                    cnv->toULength=1;
824                    state=quotePairOne;
825                } else /* Urs */ {
826                    /* callback(illegal) */
827                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
828                    cnv->toUBytes[0]=b;
829                    cnv->toULength=1;
830                    goto endloop;
831                }
832                break;
833            case quotePairOne:
834                byteOne=b;
835                cnv->toUBytes[1]=b;
836                cnv->toULength=2;
837                state=quotePairTwo;
838                break;
839            case quotePairTwo:
840                *target++=(UChar)((byteOne<<8)|b);
841                state=readCommand;
842                goto fastUnicode;
843            }
844        }
845    }
846endloop:
847
848    /* set the converter state back into UConverter */
849    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
850        /* reset to deal with the next character */
851        state=readCommand;
852    } else if(state==readCommand) {
853        /* not in a multi-byte sequence, reset toULength */
854        cnv->toULength=0;
855    }
856    scsu->toUIsSingleByteMode=isSingleByteMode;
857    scsu->toUState=state;
858    scsu->toUQuoteWindow=quoteWindow;
859    scsu->toUDynamicWindow=dynamicWindow;
860    scsu->toUByteOne=byteOne;
861
862    /* write back the updated pointers */
863    pArgs->source=(const char *)source;
864    pArgs->target=target;
865    return;
866}
867
868/* SCSU-from-Unicode conversion functions ----------------------------------- */
869
870/*
871 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
872 * reasonable results. The lookahead is minimal.
873 * Many cases are simple:
874 * A character fits directly into the current mode, a dynamic or static window,
875 * or is not compressible. These cases are tested first.
876 * Real compression heuristics are applied to the rest, in code branches for
877 * single/Unicode mode and BMP/supplementary code points.
878 * The heuristics used here are extremely simple.
879 */
880
881/* get the number of the window that this character is in, or -1 */
882static int8_t
883getWindow(const uint32_t offsets[8], uint32_t c) {
884    int i;
885    for(i=0; i<8; ++i) {
886        if((uint32_t)(c-offsets[i])<=0x7f) {
887            return (int8_t)(i);
888        }
889    }
890    return -1;
891}
892
893/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
894static UBool
895isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
896    return (UBool)(c<=offset+0x7f &&
897          (c>=offset || (c<=0x7f &&
898                        (c>=0x20 || (1UL<<c)&0x2601))));
899                                /* binary 0010 0110 0000 0001,
900                                   check for b==0xd || b==0xa || b==9 || b==0 */
901}
902
903/*
904 * getNextDynamicWindow returns the next dynamic window to be redefined
905 */
906static int8_t
907getNextDynamicWindow(SCSUData *scsu) {
908    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
909    if(++scsu->nextWindowUseIndex==8) {
910        scsu->nextWindowUseIndex=0;
911    }
912    return window;
913}
914
915/*
916 * useDynamicWindow() adjusts
917 * windowUse[] and nextWindowUseIndex for the algorithm to choose
918 * the next dynamic window to be defined;
919 * a subclass may override it and provide its own algorithm.
920 */
921static void
922useDynamicWindow(SCSUData *scsu, int8_t window) {
923    /*
924     * move the existing window, which just became the most recently used one,
925     * up in windowUse[] to nextWindowUseIndex-1
926     */
927
928    /* first, find the index of the window - backwards to favor the more recently used windows */
929    int i, j;
930
931    i=scsu->nextWindowUseIndex;
932    do {
933        if(--i<0) {
934            i=7;
935        }
936    } while(scsu->windowUse[i]!=window);
937
938    /* now copy each windowUse[i+1] to [i] */
939    j=i+1;
940    if(j==8) {
941        j=0;
942    }
943    while(j!=scsu->nextWindowUseIndex) {
944        scsu->windowUse[i]=scsu->windowUse[j];
945        i=j;
946        if(++j==8) { j=0; }
947    }
948
949    /* finally, set the window into the most recently used index */
950    scsu->windowUse[i]=window;
951}
952
953/*
954 * calculate the offset and the code for a dynamic window that contains the character
955 * takes fixed offsets into account
956 * the offset of the window is stored in the offset variable,
957 * the code is returned
958 *
959 * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
960 */
961static int
962getDynamicOffset(uint32_t c, uint32_t *pOffset) {
963    int i;
964
965    for(i=0; i<7; ++i) {
966        if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
967            *pOffset=fixedOffsets[i];
968            return 0xf9+i;
969        }
970    }
971
972    if(c<0x80) {
973        /* No dynamic window for US-ASCII. */
974        return -1;
975    } else if(c<0x3400 ||
976              (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
977              (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
978    ) {
979        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
980        *pOffset=c&0x7fffff80;
981        return (int)(c>>7);
982    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
983        /* For these characters we need to take the gapOffset into account. */
984        *pOffset=c&0x7fffff80;
985        return (int)((c-gapOffset)>>7);
986    } else {
987        return -1;
988    }
989}
990
991/*
992 * Idea for compression:
993 *  - save SCSUData and other state before really starting work
994 *  - at endloop, see if compression could be better with just unicode mode
995 *  - don't do this if a callback has been called
996 *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
997 *  - different buffer handling!
998 *
999 * Drawback or need for corrective handling:
1000 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
1001 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
1002 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
1003 *
1004 * How to achieve both?
1005 *  - Only replace the result after an SDX or SCU?
1006 */
1007
1008static void
1009_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1010                            UErrorCode *pErrorCode) {
1011    UConverter *cnv;
1012    SCSUData *scsu;
1013    const UChar *source, *sourceLimit;
1014    uint8_t *target;
1015    int32_t targetCapacity;
1016    int32_t *offsets;
1017
1018    UBool isSingleByteMode;
1019    uint8_t dynamicWindow;
1020    uint32_t currentOffset;
1021
1022    uint32_t c, delta;
1023
1024    int32_t sourceIndex, nextSourceIndex;
1025
1026    int32_t length;
1027
1028    /* variables for compression heuristics */
1029    uint32_t offset;
1030    UChar lead, trail;
1031    int code;
1032    int8_t window;
1033
1034    /* set up the local pointers */
1035    cnv=pArgs->converter;
1036    scsu=(SCSUData *)cnv->extraInfo;
1037
1038    /* set up the local pointers */
1039    source=pArgs->source;
1040    sourceLimit=pArgs->sourceLimit;
1041    target=(uint8_t *)pArgs->target;
1042    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1043    offsets=pArgs->offsets;
1044
1045    /* get the state machine state */
1046    isSingleByteMode=scsu->fromUIsSingleByteMode;
1047    dynamicWindow=scsu->fromUDynamicWindow;
1048    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1049
1050    c=cnv->fromUChar32;
1051
1052    /* sourceIndex=-1 if the current character began in the previous buffer */
1053    sourceIndex= c==0 ? 0 : -1;
1054    nextSourceIndex=0;
1055
1056    /* similar conversion "loop" as in toUnicode */
1057loop:
1058    if(isSingleByteMode) {
1059        if(c!=0 && targetCapacity>0) {
1060            goto getTrailSingle;
1061        }
1062
1063        /* state machine for single-byte mode */
1064/* singleByteMode: */
1065        while(source<sourceLimit) {
1066            if(targetCapacity<=0) {
1067                /* target is full */
1068                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1069                break;
1070            }
1071            c=*source++;
1072            ++nextSourceIndex;
1073
1074            if((c-0x20)<=0x5f) {
1075                /* pass US-ASCII graphic character through */
1076                *target++=(uint8_t)c;
1077                if(offsets!=NULL) {
1078                    *offsets++=sourceIndex;
1079                }
1080                --targetCapacity;
1081            } else if(c<0x20) {
1082                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1083                    /* CR/LF/TAB/NUL */
1084                    *target++=(uint8_t)c;
1085                    if(offsets!=NULL) {
1086                        *offsets++=sourceIndex;
1087                    }
1088                    --targetCapacity;
1089                } else {
1090                    /* quote C0 control character */
1091                    c|=SQ0<<8;
1092                    length=2;
1093                    goto outputBytes;
1094                }
1095            } else if((delta=c-currentOffset)<=0x7f) {
1096                /* use the current dynamic window */
1097                *target++=(uint8_t)(delta|0x80);
1098                if(offsets!=NULL) {
1099                    *offsets++=sourceIndex;
1100                }
1101                --targetCapacity;
1102            } else if(U16_IS_SURROGATE(c)) {
1103                if(U16_IS_SURROGATE_LEAD(c)) {
1104getTrailSingle:
1105                    lead=(UChar)c;
1106                    if(source<sourceLimit) {
1107                        /* test the following code unit */
1108                        trail=*source;
1109                        if(U16_IS_TRAIL(trail)) {
1110                            ++source;
1111                            ++nextSourceIndex;
1112                            c=U16_GET_SUPPLEMENTARY(c, trail);
1113                            /* convert this surrogate code point */
1114                            /* exit this condition tree */
1115                        } else {
1116                            /* this is an unmatched lead code unit (1st surrogate) */
1117                            /* callback(illegal) */
1118                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1119                            goto endloop;
1120                        }
1121                    } else {
1122                        /* no more input */
1123                        break;
1124                    }
1125                } else {
1126                    /* this is an unmatched trail code unit (2nd surrogate) */
1127                    /* callback(illegal) */
1128                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1129                    goto endloop;
1130                }
1131
1132                /* compress supplementary character U+10000..U+10ffff */
1133                if((delta=c-currentOffset)<=0x7f) {
1134                    /* use the current dynamic window */
1135                    *target++=(uint8_t)(delta|0x80);
1136                    if(offsets!=NULL) {
1137                        *offsets++=sourceIndex;
1138                    }
1139                    --targetCapacity;
1140                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1141                    /* there is a dynamic window that contains this character, change to it */
1142                    dynamicWindow=window;
1143                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1144                    useDynamicWindow(scsu, dynamicWindow);
1145                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1146                    length=2;
1147                    goto outputBytes;
1148                } else if((code=getDynamicOffset(c, &offset))>=0) {
1149                    /* might check if there are more characters in this window to come */
1150                    /* define an extended window with this character */
1151                    code-=0x200;
1152                    dynamicWindow=getNextDynamicWindow(scsu);
1153                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1154                    useDynamicWindow(scsu, dynamicWindow);
1155                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1156                    length=4;
1157                    goto outputBytes;
1158                } else {
1159                    /* change to Unicode mode and output this (lead, trail) pair */
1160                    isSingleByteMode=FALSE;
1161                    *target++=(uint8_t)SCU;
1162                    if(offsets!=NULL) {
1163                        *offsets++=sourceIndex;
1164                    }
1165                    --targetCapacity;
1166                    c=((uint32_t)lead<<16)|trail;
1167                    length=4;
1168                    goto outputBytes;
1169                }
1170            } else if(c<0xa0) {
1171                /* quote C1 control character */
1172                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1173                length=2;
1174                goto outputBytes;
1175            } else if(c==0xfeff || c>=0xfff0) {
1176                /* quote signature character=byte order mark and specials */
1177                c|=SQU<<16;
1178                length=3;
1179                goto outputBytes;
1180            } else {
1181                /* compress all other BMP characters */
1182                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1183                    /* there is a window defined that contains this character - switch to it or quote from it? */
1184                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1185                        /* change to dynamic window */
1186                        dynamicWindow=window;
1187                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1188                        useDynamicWindow(scsu, dynamicWindow);
1189                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1190                        length=2;
1191                        goto outputBytes;
1192                    } else {
1193                        /* quote from dynamic window */
1194                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1195                        length=2;
1196                        goto outputBytes;
1197                    }
1198                } else if((window=getWindow(staticOffsets, c))>=0) {
1199                    /* quote from static window */
1200                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1201                    length=2;
1202                    goto outputBytes;
1203                } else if((code=getDynamicOffset(c, &offset))>=0) {
1204                    /* define a dynamic window with this character */
1205                    dynamicWindow=getNextDynamicWindow(scsu);
1206                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1207                    useDynamicWindow(scsu, dynamicWindow);
1208                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1209                    length=3;
1210                    goto outputBytes;
1211                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1212                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1213                ) {
1214                    /*
1215                     * this character is not compressible (a BMP ideograph or similar);
1216                     * switch to Unicode mode if this is the last character in the block
1217                     * or there is at least one more ideograph following immediately
1218                     */
1219                    isSingleByteMode=FALSE;
1220                    c|=SCU<<16;
1221                    length=3;
1222                    goto outputBytes;
1223                } else {
1224                    /* quote Unicode */
1225                    c|=SQU<<16;
1226                    length=3;
1227                    goto outputBytes;
1228                }
1229            }
1230
1231            /* normal end of conversion: prepare for a new character */
1232            c=0;
1233            sourceIndex=nextSourceIndex;
1234        }
1235    } else {
1236        if(c!=0 && targetCapacity>0) {
1237            goto getTrailUnicode;
1238        }
1239
1240        /* state machine for Unicode mode */
1241/* unicodeByteMode: */
1242        while(source<sourceLimit) {
1243            if(targetCapacity<=0) {
1244                /* target is full */
1245                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1246                break;
1247            }
1248            c=*source++;
1249            ++nextSourceIndex;
1250
1251            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1252                /* not compressible, write character directly */
1253                if(targetCapacity>=2) {
1254                    *target++=(uint8_t)(c>>8);
1255                    *target++=(uint8_t)c;
1256                    if(offsets!=NULL) {
1257                        *offsets++=sourceIndex;
1258                        *offsets++=sourceIndex;
1259                    }
1260                    targetCapacity-=2;
1261                } else {
1262                    length=2;
1263                    goto outputBytes;
1264                }
1265            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1266                /* compress BMP character if the following one is not an uncompressible ideograph */
1267                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1268                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1269                        /* ASCII digit or letter */
1270                        isSingleByteMode=TRUE;
1271                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1272                        length=2;
1273                        goto outputBytes;
1274                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1275                        /* there is a dynamic window that contains this character, change to it */
1276                        isSingleByteMode=TRUE;
1277                        dynamicWindow=window;
1278                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1279                        useDynamicWindow(scsu, dynamicWindow);
1280                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1281                        length=2;
1282                        goto outputBytes;
1283                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1284                        /* define a dynamic window with this character */
1285                        isSingleByteMode=TRUE;
1286                        dynamicWindow=getNextDynamicWindow(scsu);
1287                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1288                        useDynamicWindow(scsu, dynamicWindow);
1289                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1290                        length=3;
1291                        goto outputBytes;
1292                    }
1293                }
1294
1295                /* don't know how to compress this character, just write it directly */
1296                length=2;
1297                goto outputBytes;
1298            } else if(c<0xe000) {
1299                /* c is a surrogate */
1300                if(U16_IS_SURROGATE_LEAD(c)) {
1301getTrailUnicode:
1302                    lead=(UChar)c;
1303                    if(source<sourceLimit) {
1304                        /* test the following code unit */
1305                        trail=*source;
1306                        if(U16_IS_TRAIL(trail)) {
1307                            ++source;
1308                            ++nextSourceIndex;
1309                            c=U16_GET_SUPPLEMENTARY(c, trail);
1310                            /* convert this surrogate code point */
1311                            /* exit this condition tree */
1312                        } else {
1313                            /* this is an unmatched lead code unit (1st surrogate) */
1314                            /* callback(illegal) */
1315                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1316                            goto endloop;
1317                        }
1318                    } else {
1319                        /* no more input */
1320                        break;
1321                    }
1322                } else {
1323                    /* this is an unmatched trail code unit (2nd surrogate) */
1324                    /* callback(illegal) */
1325                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1326                    goto endloop;
1327                }
1328
1329                /* compress supplementary character */
1330                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1331                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1332                ) {
1333                    /*
1334                     * there is a dynamic window that contains this character and
1335                     * the following character is not uncompressible,
1336                     * change to the window
1337                     */
1338                    isSingleByteMode=TRUE;
1339                    dynamicWindow=window;
1340                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1341                    useDynamicWindow(scsu, dynamicWindow);
1342                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1343                    length=2;
1344                    goto outputBytes;
1345                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1346                          (code=getDynamicOffset(c, &offset))>=0
1347                ) {
1348                    /* two supplementary characters in (probably) the same window - define an extended one */
1349                    isSingleByteMode=TRUE;
1350                    code-=0x200;
1351                    dynamicWindow=getNextDynamicWindow(scsu);
1352                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1353                    useDynamicWindow(scsu, dynamicWindow);
1354                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1355                    length=4;
1356                    goto outputBytes;
1357                } else {
1358                    /* don't know how to compress this character, just write it directly */
1359                    c=((uint32_t)lead<<16)|trail;
1360                    length=4;
1361                    goto outputBytes;
1362                }
1363            } else /* 0xe000<=c<0xf300 */ {
1364                /* quote to avoid SCSU tags */
1365                c|=UQU<<16;
1366                length=3;
1367                goto outputBytes;
1368            }
1369
1370            /* normal end of conversion: prepare for a new character */
1371            c=0;
1372            sourceIndex=nextSourceIndex;
1373        }
1374    }
1375endloop:
1376
1377    /* set the converter state back into UConverter */
1378    scsu->fromUIsSingleByteMode=isSingleByteMode;
1379    scsu->fromUDynamicWindow=dynamicWindow;
1380
1381    cnv->fromUChar32=c;
1382
1383    /* write back the updated pointers */
1384    pArgs->source=source;
1385    pArgs->target=(char *)target;
1386    pArgs->offsets=offsets;
1387    return;
1388
1389outputBytes:
1390    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1391    /* from the first if in the loop we know that targetCapacity>0 */
1392    if(length<=targetCapacity) {
1393        if(offsets==NULL) {
1394            switch(length) {
1395                /* each branch falls through to the next one */
1396            case 4:
1397                *target++=(uint8_t)(c>>24);
1398            case 3: /*fall through*/
1399                *target++=(uint8_t)(c>>16);
1400            case 2: /*fall through*/
1401                *target++=(uint8_t)(c>>8);
1402            case 1: /*fall through*/
1403                *target++=(uint8_t)c;
1404            default:
1405                /* will never occur */
1406                break;
1407            }
1408        } else {
1409            switch(length) {
1410                /* each branch falls through to the next one */
1411            case 4:
1412                *target++=(uint8_t)(c>>24);
1413                *offsets++=sourceIndex;
1414            case 3: /*fall through*/
1415                *target++=(uint8_t)(c>>16);
1416                *offsets++=sourceIndex;
1417            case 2: /*fall through*/
1418                *target++=(uint8_t)(c>>8);
1419                *offsets++=sourceIndex;
1420            case 1: /*fall through*/
1421                *target++=(uint8_t)c;
1422                *offsets++=sourceIndex;
1423            default:
1424                /* will never occur */
1425                break;
1426            }
1427        }
1428        targetCapacity-=length;
1429
1430        /* normal end of conversion: prepare for a new character */
1431        c=0;
1432        sourceIndex=nextSourceIndex;
1433        goto loop;
1434    } else {
1435        uint8_t *p;
1436
1437        /*
1438         * We actually do this backwards here:
1439         * In order to save an intermediate variable, we output
1440         * first to the overflow buffer what does not fit into the
1441         * regular target.
1442         */
1443        /* we know that 0<=targetCapacity<length<=4 */
1444        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1445        length-=targetCapacity;
1446        p=(uint8_t *)cnv->charErrorBuffer;
1447        switch(length) {
1448            /* each branch falls through to the next one */
1449        case 4:
1450            *p++=(uint8_t)(c>>24);
1451        case 3: /*fall through*/
1452            *p++=(uint8_t)(c>>16);
1453        case 2: /*fall through*/
1454            *p++=(uint8_t)(c>>8);
1455        case 1: /*fall through*/
1456            *p=(uint8_t)c;
1457        default:
1458            /* will never occur */
1459            break;
1460        }
1461        cnv->charErrorBufferLength=(int8_t)length;
1462
1463        /* now output what fits into the regular target */
1464        c>>=8*length; /* length was reduced by targetCapacity */
1465        switch(targetCapacity) {
1466            /* each branch falls through to the next one */
1467        case 3:
1468            *target++=(uint8_t)(c>>16);
1469            if(offsets!=NULL) {
1470                *offsets++=sourceIndex;
1471            }
1472        case 2: /*fall through*/
1473            *target++=(uint8_t)(c>>8);
1474            if(offsets!=NULL) {
1475                *offsets++=sourceIndex;
1476            }
1477        case 1: /*fall through*/
1478            *target++=(uint8_t)c;
1479            if(offsets!=NULL) {
1480                *offsets++=sourceIndex;
1481            }
1482        default:
1483            break;
1484        }
1485
1486        /* target overflow */
1487        targetCapacity=0;
1488        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1489        c=0;
1490        goto endloop;
1491    }
1492}
1493
1494/*
1495 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1496 * If a change is made in the original function, then either
1497 * change this function the same way or
1498 * re-copy the original function and remove the variables
1499 * offsets, sourceIndex, and nextSourceIndex.
1500 */
1501static void
1502_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1503                 UErrorCode *pErrorCode) {
1504    UConverter *cnv;
1505    SCSUData *scsu;
1506    const UChar *source, *sourceLimit;
1507    uint8_t *target;
1508    int32_t targetCapacity;
1509
1510    UBool isSingleByteMode;
1511    uint8_t dynamicWindow;
1512    uint32_t currentOffset;
1513
1514    uint32_t c, delta;
1515
1516    int32_t length;
1517
1518    /* variables for compression heuristics */
1519    uint32_t offset;
1520    UChar lead, trail;
1521    int code;
1522    int8_t window;
1523
1524    /* set up the local pointers */
1525    cnv=pArgs->converter;
1526    scsu=(SCSUData *)cnv->extraInfo;
1527
1528    /* set up the local pointers */
1529    source=pArgs->source;
1530    sourceLimit=pArgs->sourceLimit;
1531    target=(uint8_t *)pArgs->target;
1532    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1533
1534    /* get the state machine state */
1535    isSingleByteMode=scsu->fromUIsSingleByteMode;
1536    dynamicWindow=scsu->fromUDynamicWindow;
1537    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1538
1539    c=cnv->fromUChar32;
1540
1541    /* similar conversion "loop" as in toUnicode */
1542loop:
1543    if(isSingleByteMode) {
1544        if(c!=0 && targetCapacity>0) {
1545            goto getTrailSingle;
1546        }
1547
1548        /* state machine for single-byte mode */
1549/* singleByteMode: */
1550        while(source<sourceLimit) {
1551            if(targetCapacity<=0) {
1552                /* target is full */
1553                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1554                break;
1555            }
1556            c=*source++;
1557
1558            if((c-0x20)<=0x5f) {
1559                /* pass US-ASCII graphic character through */
1560                *target++=(uint8_t)c;
1561                --targetCapacity;
1562            } else if(c<0x20) {
1563                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1564                    /* CR/LF/TAB/NUL */
1565                    *target++=(uint8_t)c;
1566                    --targetCapacity;
1567                } else {
1568                    /* quote C0 control character */
1569                    c|=SQ0<<8;
1570                    length=2;
1571                    goto outputBytes;
1572                }
1573            } else if((delta=c-currentOffset)<=0x7f) {
1574                /* use the current dynamic window */
1575                *target++=(uint8_t)(delta|0x80);
1576                --targetCapacity;
1577            } else if(U16_IS_SURROGATE(c)) {
1578                if(U16_IS_SURROGATE_LEAD(c)) {
1579getTrailSingle:
1580                    lead=(UChar)c;
1581                    if(source<sourceLimit) {
1582                        /* test the following code unit */
1583                        trail=*source;
1584                        if(U16_IS_TRAIL(trail)) {
1585                            ++source;
1586                            c=U16_GET_SUPPLEMENTARY(c, trail);
1587                            /* convert this surrogate code point */
1588                            /* exit this condition tree */
1589                        } else {
1590                            /* this is an unmatched lead code unit (1st surrogate) */
1591                            /* callback(illegal) */
1592                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1593                            goto endloop;
1594                        }
1595                    } else {
1596                        /* no more input */
1597                        break;
1598                    }
1599                } else {
1600                    /* this is an unmatched trail code unit (2nd surrogate) */
1601                    /* callback(illegal) */
1602                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1603                    goto endloop;
1604                }
1605
1606                /* compress supplementary character U+10000..U+10ffff */
1607                if((delta=c-currentOffset)<=0x7f) {
1608                    /* use the current dynamic window */
1609                    *target++=(uint8_t)(delta|0x80);
1610                    --targetCapacity;
1611                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1612                    /* there is a dynamic window that contains this character, change to it */
1613                    dynamicWindow=window;
1614                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1615                    useDynamicWindow(scsu, dynamicWindow);
1616                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1617                    length=2;
1618                    goto outputBytes;
1619                } else if((code=getDynamicOffset(c, &offset))>=0) {
1620                    /* might check if there are more characters in this window to come */
1621                    /* define an extended window with this character */
1622                    code-=0x200;
1623                    dynamicWindow=getNextDynamicWindow(scsu);
1624                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1625                    useDynamicWindow(scsu, dynamicWindow);
1626                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1627                    length=4;
1628                    goto outputBytes;
1629                } else {
1630                    /* change to Unicode mode and output this (lead, trail) pair */
1631                    isSingleByteMode=FALSE;
1632                    *target++=(uint8_t)SCU;
1633                    --targetCapacity;
1634                    c=((uint32_t)lead<<16)|trail;
1635                    length=4;
1636                    goto outputBytes;
1637                }
1638            } else if(c<0xa0) {
1639                /* quote C1 control character */
1640                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1641                length=2;
1642                goto outputBytes;
1643            } else if(c==0xfeff || c>=0xfff0) {
1644                /* quote signature character=byte order mark and specials */
1645                c|=SQU<<16;
1646                length=3;
1647                goto outputBytes;
1648            } else {
1649                /* compress all other BMP characters */
1650                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1651                    /* there is a window defined that contains this character - switch to it or quote from it? */
1652                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1653                        /* change to dynamic window */
1654                        dynamicWindow=window;
1655                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1656                        useDynamicWindow(scsu, dynamicWindow);
1657                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1658                        length=2;
1659                        goto outputBytes;
1660                    } else {
1661                        /* quote from dynamic window */
1662                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1663                        length=2;
1664                        goto outputBytes;
1665                    }
1666                } else if((window=getWindow(staticOffsets, c))>=0) {
1667                    /* quote from static window */
1668                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1669                    length=2;
1670                    goto outputBytes;
1671                } else if((code=getDynamicOffset(c, &offset))>=0) {
1672                    /* define a dynamic window with this character */
1673                    dynamicWindow=getNextDynamicWindow(scsu);
1674                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1675                    useDynamicWindow(scsu, dynamicWindow);
1676                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1677                    length=3;
1678                    goto outputBytes;
1679                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1680                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1681                ) {
1682                    /*
1683                     * this character is not compressible (a BMP ideograph or similar);
1684                     * switch to Unicode mode if this is the last character in the block
1685                     * or there is at least one more ideograph following immediately
1686                     */
1687                    isSingleByteMode=FALSE;
1688                    c|=SCU<<16;
1689                    length=3;
1690                    goto outputBytes;
1691                } else {
1692                    /* quote Unicode */
1693                    c|=SQU<<16;
1694                    length=3;
1695                    goto outputBytes;
1696                }
1697            }
1698
1699            /* normal end of conversion: prepare for a new character */
1700            c=0;
1701        }
1702    } else {
1703        if(c!=0 && targetCapacity>0) {
1704            goto getTrailUnicode;
1705        }
1706
1707        /* state machine for Unicode mode */
1708/* unicodeByteMode: */
1709        while(source<sourceLimit) {
1710            if(targetCapacity<=0) {
1711                /* target is full */
1712                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1713                break;
1714            }
1715            c=*source++;
1716
1717            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1718                /* not compressible, write character directly */
1719                if(targetCapacity>=2) {
1720                    *target++=(uint8_t)(c>>8);
1721                    *target++=(uint8_t)c;
1722                    targetCapacity-=2;
1723                } else {
1724                    length=2;
1725                    goto outputBytes;
1726                }
1727            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1728                /* compress BMP character if the following one is not an uncompressible ideograph */
1729                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1730                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1731                        /* ASCII digit or letter */
1732                        isSingleByteMode=TRUE;
1733                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1734                        length=2;
1735                        goto outputBytes;
1736                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1737                        /* there is a dynamic window that contains this character, change to it */
1738                        isSingleByteMode=TRUE;
1739                        dynamicWindow=window;
1740                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1741                        useDynamicWindow(scsu, dynamicWindow);
1742                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1743                        length=2;
1744                        goto outputBytes;
1745                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1746                        /* define a dynamic window with this character */
1747                        isSingleByteMode=TRUE;
1748                        dynamicWindow=getNextDynamicWindow(scsu);
1749                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1750                        useDynamicWindow(scsu, dynamicWindow);
1751                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1752                        length=3;
1753                        goto outputBytes;
1754                    }
1755                }
1756
1757                /* don't know how to compress this character, just write it directly */
1758                length=2;
1759                goto outputBytes;
1760            } else if(c<0xe000) {
1761                /* c is a surrogate */
1762                if(U16_IS_SURROGATE_LEAD(c)) {
1763getTrailUnicode:
1764                    lead=(UChar)c;
1765                    if(source<sourceLimit) {
1766                        /* test the following code unit */
1767                        trail=*source;
1768                        if(U16_IS_TRAIL(trail)) {
1769                            ++source;
1770                            c=U16_GET_SUPPLEMENTARY(c, trail);
1771                            /* convert this surrogate code point */
1772                            /* exit this condition tree */
1773                        } else {
1774                            /* this is an unmatched lead code unit (1st surrogate) */
1775                            /* callback(illegal) */
1776                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1777                            goto endloop;
1778                        }
1779                    } else {
1780                        /* no more input */
1781                        break;
1782                    }
1783                } else {
1784                    /* this is an unmatched trail code unit (2nd surrogate) */
1785                    /* callback(illegal) */
1786                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1787                    goto endloop;
1788                }
1789
1790                /* compress supplementary character */
1791                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1792                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1793                ) {
1794                    /*
1795                     * there is a dynamic window that contains this character and
1796                     * the following character is not uncompressible,
1797                     * change to the window
1798                     */
1799                    isSingleByteMode=TRUE;
1800                    dynamicWindow=window;
1801                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1802                    useDynamicWindow(scsu, dynamicWindow);
1803                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1804                    length=2;
1805                    goto outputBytes;
1806                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1807                          (code=getDynamicOffset(c, &offset))>=0
1808                ) {
1809                    /* two supplementary characters in (probably) the same window - define an extended one */
1810                    isSingleByteMode=TRUE;
1811                    code-=0x200;
1812                    dynamicWindow=getNextDynamicWindow(scsu);
1813                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1814                    useDynamicWindow(scsu, dynamicWindow);
1815                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1816                    length=4;
1817                    goto outputBytes;
1818                } else {
1819                    /* don't know how to compress this character, just write it directly */
1820                    c=((uint32_t)lead<<16)|trail;
1821                    length=4;
1822                    goto outputBytes;
1823                }
1824            } else /* 0xe000<=c<0xf300 */ {
1825                /* quote to avoid SCSU tags */
1826                c|=UQU<<16;
1827                length=3;
1828                goto outputBytes;
1829            }
1830
1831            /* normal end of conversion: prepare for a new character */
1832            c=0;
1833        }
1834    }
1835endloop:
1836
1837    /* set the converter state back into UConverter */
1838    scsu->fromUIsSingleByteMode=isSingleByteMode;
1839    scsu->fromUDynamicWindow=dynamicWindow;
1840
1841    cnv->fromUChar32=c;
1842
1843    /* write back the updated pointers */
1844    pArgs->source=source;
1845    pArgs->target=(char *)target;
1846    return;
1847
1848outputBytes:
1849    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1850    /* from the first if in the loop we know that targetCapacity>0 */
1851    if(length<=targetCapacity) {
1852        switch(length) {
1853            /* each branch falls through to the next one */
1854        case 4:
1855            *target++=(uint8_t)(c>>24);
1856        case 3: /*fall through*/
1857            *target++=(uint8_t)(c>>16);
1858        case 2: /*fall through*/
1859            *target++=(uint8_t)(c>>8);
1860        case 1: /*fall through*/
1861            *target++=(uint8_t)c;
1862        default:
1863            /* will never occur */
1864            break;
1865        }
1866        targetCapacity-=length;
1867
1868        /* normal end of conversion: prepare for a new character */
1869        c=0;
1870        goto loop;
1871    } else {
1872        uint8_t *p;
1873
1874        /*
1875         * We actually do this backwards here:
1876         * In order to save an intermediate variable, we output
1877         * first to the overflow buffer what does not fit into the
1878         * regular target.
1879         */
1880        /* we know that 0<=targetCapacity<length<=4 */
1881        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1882        length-=targetCapacity;
1883        p=(uint8_t *)cnv->charErrorBuffer;
1884        switch(length) {
1885            /* each branch falls through to the next one */
1886        case 4:
1887            *p++=(uint8_t)(c>>24);
1888        case 3: /*fall through*/
1889            *p++=(uint8_t)(c>>16);
1890        case 2: /*fall through*/
1891            *p++=(uint8_t)(c>>8);
1892        case 1: /*fall through*/
1893            *p=(uint8_t)c;
1894        default:
1895            /* will never occur */
1896            break;
1897        }
1898        cnv->charErrorBufferLength=(int8_t)length;
1899
1900        /* now output what fits into the regular target */
1901        c>>=8*length; /* length was reduced by targetCapacity */
1902        switch(targetCapacity) {
1903            /* each branch falls through to the next one */
1904        case 3:
1905            *target++=(uint8_t)(c>>16);
1906        case 2: /*fall through*/
1907            *target++=(uint8_t)(c>>8);
1908        case 1: /*fall through*/
1909            *target++=(uint8_t)c;
1910        default:
1911            break;
1912        }
1913
1914        /* target overflow */
1915        targetCapacity=0;
1916        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1917        c=0;
1918        goto endloop;
1919    }
1920}
1921
1922/* miscellaneous ------------------------------------------------------------ */
1923
1924static const char *
1925_SCSUGetName(const UConverter *cnv) {
1926    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1927
1928    switch(scsu->locale) {
1929    case l_ja:
1930        return "SCSU,locale=ja";
1931    default:
1932        return "SCSU";
1933    }
1934}
1935
1936/* structure for SafeClone calculations */
1937struct cloneSCSUStruct
1938{
1939    UConverter cnv;
1940    SCSUData mydata;
1941};
1942
1943static UConverter *
1944_SCSUSafeClone(const UConverter *cnv,
1945               void *stackBuffer,
1946               int32_t *pBufferSize,
1947               UErrorCode *status)
1948{
1949    struct cloneSCSUStruct * localClone;
1950    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
1951
1952    if (U_FAILURE(*status)){
1953        return 0;
1954    }
1955
1956    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1957        *pBufferSize = bufferSizeNeeded;
1958        return 0;
1959    }
1960
1961    localClone = (struct cloneSCSUStruct *)stackBuffer;
1962    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1963
1964    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
1965    localClone->cnv.extraInfo = &localClone->mydata;
1966    localClone->cnv.isExtraLocal = TRUE;
1967
1968    return &localClone->cnv;
1969}
1970
1971
1972static const UConverterImpl _SCSUImpl={
1973    UCNV_SCSU,
1974
1975    NULL,
1976    NULL,
1977
1978    _SCSUOpen,
1979    _SCSUClose,
1980    _SCSUReset,
1981
1982    _SCSUToUnicode,
1983    _SCSUToUnicodeWithOffsets,
1984    _SCSUFromUnicode,
1985    _SCSUFromUnicodeWithOffsets,
1986    NULL,
1987
1988    NULL,
1989    _SCSUGetName,
1990    NULL,
1991    _SCSUSafeClone,
1992    ucnv_getCompleteUnicodeSet
1993};
1994
1995static const UConverterStaticData _SCSUStaticData={
1996    sizeof(UConverterStaticData),
1997    "SCSU",
1998    1212, /* CCSID for SCSU */
1999    UCNV_IBM, UCNV_SCSU,
2000    1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
2001    /*
2002     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
2003     * substitution string.
2004     */
2005    { 0x0e, 0xff, 0xfd, 0 }, 3,
2006    FALSE, FALSE,
2007    0,
2008    0,
2009    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2010};
2011
2012const UConverterSharedData _SCSUData={
2013    sizeof(UConverterSharedData), ~((uint32_t)0),
2014    NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
2015    0
2016};
2017
2018#endif
2019