1/*
2******************************************************************************
3*
4*   Copyright (C) 1998-2012, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*  ucnv.c:
10*  Implements APIs for the ICU's codeset conversion library;
11*  mostly calls through internal functions;
12*  created by Bertrand A. Damiba
13*
14* Modification History:
15*
16*   Date        Name        Description
17*   04/04/99    helena      Fixed internal header inclusion.
18*   05/09/00    helena      Added implementation to handle fallback mappings.
19*   06/20/2000  helena      OS/400 port changes; mostly typecast.
20*/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_CONVERSION
25
26#include "unicode/ustring.h"
27#include "unicode/ucnv.h"
28#include "unicode/ucnv_err.h"
29#include "unicode/uset.h"
30#include "unicode/utf.h"
31#include "unicode/utf16.h"
32#include "putilimp.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "utracimp.h"
37#include "ustr_imp.h"
38#include "ucnv_imp.h"
39#include "ucnv_cnv.h"
40#include "ucnv_bld.h"
41
42/* size of intermediate and preflighting buffers in ucnv_convert() */
43#define CHUNK_SIZE 1024
44
45typedef struct UAmbiguousConverter {
46    const char *name;
47    const UChar variant5c;
48} UAmbiguousConverter;
49
50static const UAmbiguousConverter ambiguousConverters[]={
51    { "ibm-897_P100-1995", 0xa5 },
52    { "ibm-942_P120-1999", 0xa5 },
53    { "ibm-943_P130-1999", 0xa5 },
54    { "ibm-946_P100-1995", 0xa5 },
55    { "ibm-33722_P120-1999", 0xa5 },
56    { "ibm-1041_P100-1995", 0xa5 },
57    /*{ "ibm-54191_P100-2006", 0xa5 },*/
58    /*{ "ibm-62383_P100-2007", 0xa5 },*/
59    /*{ "ibm-891_P100-1995", 0x20a9 },*/
60    { "ibm-944_P100-1995", 0x20a9 },
61    { "ibm-949_P110-1999", 0x20a9 },
62    { "ibm-1363_P110-1997", 0x20a9 },
63    { "ISO_2022,locale=ko,version=0", 0x20a9 },
64    { "ibm-1088_P100-1995", 0x20a9 }
65};
66
67/*Calls through createConverter */
68U_CAPI UConverter* U_EXPORT2
69ucnv_open (const char *name,
70                       UErrorCode * err)
71{
72    UConverter *r;
73
74    if (err == NULL || U_FAILURE (*err)) {
75        return NULL;
76    }
77
78    r =  ucnv_createConverter(NULL, name, err);
79    return r;
80}
81
82U_CAPI UConverter* U_EXPORT2
83ucnv_openPackage   (const char *packageName, const char *converterName, UErrorCode * err)
84{
85    return ucnv_createConverterFromPackage(packageName, converterName,  err);
86}
87
88/*Extracts the UChar* to a char* and calls through createConverter */
89U_CAPI UConverter*   U_EXPORT2
90ucnv_openU (const UChar * name,
91                         UErrorCode * err)
92{
93    char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
94
95    if (err == NULL || U_FAILURE(*err))
96        return NULL;
97    if (name == NULL)
98        return ucnv_open (NULL, err);
99    if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
100    {
101        *err = U_ILLEGAL_ARGUMENT_ERROR;
102        return NULL;
103    }
104    return ucnv_open(u_austrcpy(asciiName, name), err);
105}
106
107/* Copy the string that is represented by the UConverterPlatform enum
108 * @param platformString An output buffer
109 * @param platform An enum representing a platform
110 * @return the length of the copied string.
111 */
112static int32_t
113ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
114{
115    switch (pltfrm)
116    {
117    case UCNV_IBM:
118        uprv_strcpy(platformString, "ibm-");
119        return 4;
120    case UCNV_UNKNOWN:
121        break;
122    }
123
124    /* default to empty string */
125    *platformString = 0;
126    return 0;
127}
128
129/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
130 *through createConverter*/
131U_CAPI UConverter*   U_EXPORT2
132ucnv_openCCSID (int32_t codepage,
133                UConverterPlatform platform,
134                UErrorCode * err)
135{
136    char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
137    int32_t myNameLen;
138
139    if (err == NULL || U_FAILURE (*err))
140        return NULL;
141
142    /* ucnv_copyPlatformString could return "ibm-" or "cp" */
143    myNameLen = ucnv_copyPlatformString(myName, platform);
144    T_CString_integerToString(myName + myNameLen, codepage, 10);
145
146    return ucnv_createConverter(NULL, myName, err);
147}
148
149/* Creating a temporary stack-based object that can be used in one thread,
150and created from a converter that is shared across threads.
151*/
152
153U_CAPI UConverter* U_EXPORT2
154ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
155{
156    UConverter *localConverter, *allocatedConverter;
157    int32_t bufferSizeNeeded;
158    char *stackBufferChars = (char *)stackBuffer;
159    UErrorCode cbErr;
160    UConverterToUnicodeArgs toUArgs = {
161        sizeof(UConverterToUnicodeArgs),
162            TRUE,
163            NULL,
164            NULL,
165            NULL,
166            NULL,
167            NULL,
168            NULL
169    };
170    UConverterFromUnicodeArgs fromUArgs = {
171        sizeof(UConverterFromUnicodeArgs),
172            TRUE,
173            NULL,
174            NULL,
175            NULL,
176            NULL,
177            NULL,
178            NULL
179    };
180
181    UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
182
183    if (status == NULL || U_FAILURE(*status)){
184        UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
185        return 0;
186    }
187
188    if (!pBufferSize || !cnv){
189        *status = U_ILLEGAL_ARGUMENT_ERROR;
190        UTRACE_EXIT_STATUS(*status);
191        return 0;
192    }
193
194    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
195                                    ucnv_getName(cnv, status), cnv, stackBuffer);
196
197    if (cnv->sharedData->impl->safeClone != NULL) {
198        /* call the custom safeClone function for sizing */
199        bufferSizeNeeded = 0;
200        cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
201    }
202    else
203    {
204        /* inherent sizing */
205        bufferSizeNeeded = sizeof(UConverter);
206    }
207
208    if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
209        *pBufferSize = bufferSizeNeeded;
210        UTRACE_EXIT_VALUE(bufferSizeNeeded);
211        return 0;
212    }
213
214
215    /* Pointers on 64-bit platforms need to be aligned
216     * on a 64-bit boundary in memory.
217     */
218    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
219        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
220        if(*pBufferSize > offsetUp) {
221            *pBufferSize -= offsetUp;
222            stackBufferChars += offsetUp;
223        } else {
224            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
225            *pBufferSize = 1;
226        }
227    }
228
229    stackBuffer = (void *)stackBufferChars;
230
231    /* Now, see if we must allocate any memory */
232    if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
233    {
234        /* allocate one here...*/
235        localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
236
237        if(localConverter == NULL) {
238            *status = U_MEMORY_ALLOCATION_ERROR;
239            UTRACE_EXIT_STATUS(*status);
240            return NULL;
241        }
242
243        if (U_SUCCESS(*status)) {
244            *status = U_SAFECLONE_ALLOCATED_WARNING;
245        }
246
247        /* record the fact that memory was allocated */
248        *pBufferSize = bufferSizeNeeded;
249    } else {
250        /* just use the stack buffer */
251        localConverter = (UConverter*) stackBuffer;
252        allocatedConverter = NULL;
253    }
254
255    uprv_memset(localConverter, 0, bufferSizeNeeded);
256
257    /* Copy initial state */
258    uprv_memcpy(localConverter, cnv, sizeof(UConverter));
259    localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
260
261    /* copy the substitution string */
262    if (cnv->subChars == (uint8_t *)cnv->subUChars) {
263        localConverter->subChars = (uint8_t *)localConverter->subUChars;
264    } else {
265        localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
266        if (localConverter->subChars == NULL) {
267            uprv_free(allocatedConverter);
268            UTRACE_EXIT_STATUS(*status);
269            return NULL;
270        }
271        uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
272    }
273
274    /* now either call the safeclone fcn or not */
275    if (cnv->sharedData->impl->safeClone != NULL) {
276        /* call the custom safeClone function */
277        localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
278    }
279
280    if(localConverter==NULL || U_FAILURE(*status)) {
281        if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
282            uprv_free(allocatedConverter->subChars);
283        }
284        uprv_free(allocatedConverter);
285        UTRACE_EXIT_STATUS(*status);
286        return NULL;
287    }
288
289    /* increment refcount of shared data if needed */
290    /*
291    Checking whether it's an algorithic converter is okay
292    in multithreaded applications because the value never changes.
293    Don't check referenceCounter for any other value.
294    */
295    if (cnv->sharedData->referenceCounter != ~0) {
296        ucnv_incrementRefCount(cnv->sharedData);
297    }
298
299    if(localConverter == (UConverter*)stackBuffer) {
300        /* we're using user provided data - set to not destroy */
301        localConverter->isCopyLocal = TRUE;
302    }
303
304    /* allow callback functions to handle any memory allocation */
305    toUArgs.converter = fromUArgs.converter = localConverter;
306    cbErr = U_ZERO_ERROR;
307    cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
308    cbErr = U_ZERO_ERROR;
309    cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
310
311    UTRACE_EXIT_PTR_STATUS(localConverter, *status);
312    return localConverter;
313}
314
315
316
317/*Decreases the reference counter in the shared immutable section of the object
318 *and frees the mutable part*/
319
320U_CAPI void  U_EXPORT2
321ucnv_close (UConverter * converter)
322{
323    UErrorCode errorCode = U_ZERO_ERROR;
324
325    UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
326
327    if (converter == NULL)
328    {
329        UTRACE_EXIT();
330        return;
331    }
332
333    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
334        ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
335
336    /* In order to speed up the close, only call the callbacks when they have been changed.
337    This performance check will only work when the callbacks are set within a shared library
338    or from user code that statically links this code. */
339    /* first, notify the callback functions that the converter is closed */
340    if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
341        UConverterToUnicodeArgs toUArgs = {
342            sizeof(UConverterToUnicodeArgs),
343                TRUE,
344                NULL,
345                NULL,
346                NULL,
347                NULL,
348                NULL,
349                NULL
350        };
351
352        toUArgs.converter = converter;
353        errorCode = U_ZERO_ERROR;
354        converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
355    }
356    if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
357        UConverterFromUnicodeArgs fromUArgs = {
358            sizeof(UConverterFromUnicodeArgs),
359                TRUE,
360                NULL,
361                NULL,
362                NULL,
363                NULL,
364                NULL,
365                NULL
366        };
367        fromUArgs.converter = converter;
368        errorCode = U_ZERO_ERROR;
369        converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
370    }
371
372    if (converter->sharedData->impl->close != NULL) {
373        converter->sharedData->impl->close(converter);
374    }
375
376    if (converter->subChars != (uint8_t *)converter->subUChars) {
377        uprv_free(converter->subChars);
378    }
379
380    /*
381    Checking whether it's an algorithic converter is okay
382    in multithreaded applications because the value never changes.
383    Don't check referenceCounter for any other value.
384    */
385    if (converter->sharedData->referenceCounter != ~0) {
386        ucnv_unloadSharedDataIfReady(converter->sharedData);
387    }
388
389    if(!converter->isCopyLocal){
390        uprv_free(converter);
391    }
392
393    UTRACE_EXIT();
394}
395
396/*returns a single Name from the list, will return NULL if out of bounds
397 */
398U_CAPI const char*   U_EXPORT2
399ucnv_getAvailableName (int32_t n)
400{
401    if (0 <= n && n <= 0xffff) {
402        UErrorCode err = U_ZERO_ERROR;
403        const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
404        if (U_SUCCESS(err)) {
405            return name;
406        }
407    }
408    return NULL;
409}
410
411U_CAPI int32_t   U_EXPORT2
412ucnv_countAvailable ()
413{
414    UErrorCode err = U_ZERO_ERROR;
415    return ucnv_bld_countAvailableConverters(&err);
416}
417
418U_CAPI void    U_EXPORT2
419ucnv_getSubstChars (const UConverter * converter,
420                    char *mySubChar,
421                    int8_t * len,
422                    UErrorCode * err)
423{
424    if (U_FAILURE (*err))
425        return;
426
427    if (converter->subCharLen <= 0) {
428        /* Unicode string or empty string from ucnv_setSubstString(). */
429        *len = 0;
430        return;
431    }
432
433    if (*len < converter->subCharLen) /*not enough space in subChars */
434    {
435        *err = U_INDEX_OUTOFBOUNDS_ERROR;
436        return;
437    }
438
439    uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen);   /*fills in the subchars */
440    *len = converter->subCharLen; /*store # of bytes copied to buffer */
441}
442
443U_CAPI void    U_EXPORT2
444ucnv_setSubstChars (UConverter * converter,
445                    const char *mySubChar,
446                    int8_t len,
447                    UErrorCode * err)
448{
449    if (U_FAILURE (*err))
450        return;
451
452    /*Makes sure that the subChar is within the codepages char length boundaries */
453    if ((len > converter->sharedData->staticData->maxBytesPerChar)
454     || (len < converter->sharedData->staticData->minBytesPerChar))
455    {
456        *err = U_ILLEGAL_ARGUMENT_ERROR;
457        return;
458    }
459
460    uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
461    converter->subCharLen = len;  /*sets the new len */
462
463    /*
464    * There is currently (2001Feb) no separate API to set/get subChar1.
465    * In order to always have subChar written after it is explicitly set,
466    * we set subChar1 to 0.
467    */
468    converter->subChar1 = 0;
469
470    return;
471}
472
473U_CAPI void U_EXPORT2
474ucnv_setSubstString(UConverter *cnv,
475                    const UChar *s,
476                    int32_t length,
477                    UErrorCode *err) {
478    UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
479    char chars[UCNV_ERROR_BUFFER_LENGTH];
480
481    UConverter *clone;
482    uint8_t *subChars;
483    int32_t cloneSize, length8;
484
485    /* Let the following functions check all arguments. */
486    cloneSize = sizeof(cloneBuffer);
487    clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
488    ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
489    length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
490    ucnv_close(clone);
491    if (U_FAILURE(*err)) {
492        return;
493    }
494
495    if (cnv->sharedData->impl->writeSub == NULL
496#if !UCONFIG_NO_LEGACY_CONVERSION
497        || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
498         ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
499#endif
500    ) {
501        /* The converter is not stateful. Store the charset bytes as a fixed string. */
502        subChars = (uint8_t *)chars;
503    } else {
504        /*
505         * The converter has a non-default writeSub() function, indicating
506         * that it is stateful.
507         * Store the Unicode string for on-the-fly conversion for correct
508         * state handling.
509         */
510        if (length > UCNV_ERROR_BUFFER_LENGTH) {
511            /*
512             * Should not occur. The converter should output at least one byte
513             * per UChar, which means that ucnv_fromUChars() should catch all
514             * overflows.
515             */
516            *err = U_BUFFER_OVERFLOW_ERROR;
517            return;
518        }
519        subChars = (uint8_t *)s;
520        if (length < 0) {
521            length = u_strlen(s);
522        }
523        length8 = length * U_SIZEOF_UCHAR;
524    }
525
526    /*
527     * For storing the substitution string, select either the small buffer inside
528     * UConverter or allocate a subChars buffer.
529     */
530    if (length8 > UCNV_MAX_SUBCHAR_LEN) {
531        /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
532        if (cnv->subChars == (uint8_t *)cnv->subUChars) {
533            /* Allocate a new buffer for the string. */
534            cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
535            if (cnv->subChars == NULL) {
536                cnv->subChars = (uint8_t *)cnv->subUChars;
537                *err = U_MEMORY_ALLOCATION_ERROR;
538                return;
539            }
540            uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
541        }
542    }
543
544    /* Copy the substitution string into the UConverter or its subChars buffer. */
545    if (length8 == 0) {
546        cnv->subCharLen = 0;
547    } else {
548        uprv_memcpy(cnv->subChars, subChars, length8);
549        if (subChars == (uint8_t *)chars) {
550            cnv->subCharLen = (int8_t)length8;
551        } else /* subChars == s */ {
552            cnv->subCharLen = (int8_t)-length;
553        }
554    }
555
556    /* See comment in ucnv_setSubstChars(). */
557    cnv->subChar1 = 0;
558}
559
560/*resets the internal states of a converter
561 *goal : have the same behaviour than a freshly created converter
562 */
563static void _reset(UConverter *converter, UConverterResetChoice choice,
564                   UBool callCallback) {
565    if(converter == NULL) {
566        return;
567    }
568
569    if(callCallback) {
570        /* first, notify the callback functions that the converter is reset */
571        UErrorCode errorCode;
572
573        if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
574            UConverterToUnicodeArgs toUArgs = {
575                sizeof(UConverterToUnicodeArgs),
576                TRUE,
577                NULL,
578                NULL,
579                NULL,
580                NULL,
581                NULL,
582                NULL
583            };
584            toUArgs.converter = converter;
585            errorCode = U_ZERO_ERROR;
586            converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
587        }
588        if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
589            UConverterFromUnicodeArgs fromUArgs = {
590                sizeof(UConverterFromUnicodeArgs),
591                TRUE,
592                NULL,
593                NULL,
594                NULL,
595                NULL,
596                NULL,
597                NULL
598            };
599            fromUArgs.converter = converter;
600            errorCode = U_ZERO_ERROR;
601            converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
602        }
603    }
604
605    /* now reset the converter itself */
606    if(choice<=UCNV_RESET_TO_UNICODE) {
607        converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
608        converter->mode = 0;
609        converter->toULength = 0;
610        converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
611        converter->preToULength = 0;
612    }
613    if(choice!=UCNV_RESET_TO_UNICODE) {
614        converter->fromUnicodeStatus = 0;
615        converter->fromUChar32 = 0;
616        converter->invalidUCharLength = converter->charErrorBufferLength = 0;
617        converter->preFromUFirstCP = U_SENTINEL;
618        converter->preFromULength = 0;
619    }
620
621    if (converter->sharedData->impl->reset != NULL) {
622        /* call the custom reset function */
623        converter->sharedData->impl->reset(converter, choice);
624    }
625}
626
627U_CAPI void  U_EXPORT2
628ucnv_reset(UConverter *converter)
629{
630    _reset(converter, UCNV_RESET_BOTH, TRUE);
631}
632
633U_CAPI void  U_EXPORT2
634ucnv_resetToUnicode(UConverter *converter)
635{
636    _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
637}
638
639U_CAPI void  U_EXPORT2
640ucnv_resetFromUnicode(UConverter *converter)
641{
642    _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
643}
644
645U_CAPI int8_t   U_EXPORT2
646ucnv_getMaxCharSize (const UConverter * converter)
647{
648    return converter->maxBytesPerUChar;
649}
650
651
652U_CAPI int8_t   U_EXPORT2
653ucnv_getMinCharSize (const UConverter * converter)
654{
655    return converter->sharedData->staticData->minBytesPerChar;
656}
657
658U_CAPI const char*   U_EXPORT2
659ucnv_getName (const UConverter * converter, UErrorCode * err)
660
661{
662    if (U_FAILURE (*err))
663        return NULL;
664    if(converter->sharedData->impl->getName){
665        const char* temp= converter->sharedData->impl->getName(converter);
666        if(temp)
667            return temp;
668    }
669    return converter->sharedData->staticData->name;
670}
671
672U_CAPI int32_t U_EXPORT2
673ucnv_getCCSID(const UConverter * converter,
674              UErrorCode * err)
675{
676    int32_t ccsid;
677    if (U_FAILURE (*err))
678        return -1;
679
680    ccsid = converter->sharedData->staticData->codepage;
681    if (ccsid == 0) {
682        /* Rare case. This is for cases like gb18030,
683        which doesn't have an IBM canonical name, but does have an IBM alias. */
684        const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
685        if (U_SUCCESS(*err) && standardName) {
686            const char *ccsidStr = uprv_strchr(standardName, '-');
687            if (ccsidStr) {
688                ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
689            }
690        }
691    }
692    return ccsid;
693}
694
695
696U_CAPI UConverterPlatform   U_EXPORT2
697ucnv_getPlatform (const UConverter * converter,
698                                      UErrorCode * err)
699{
700    if (U_FAILURE (*err))
701        return UCNV_UNKNOWN;
702
703    return (UConverterPlatform)converter->sharedData->staticData->platform;
704}
705
706U_CAPI void U_EXPORT2
707    ucnv_getToUCallBack (const UConverter * converter,
708                         UConverterToUCallback *action,
709                         const void **context)
710{
711    *action = converter->fromCharErrorBehaviour;
712    *context = converter->toUContext;
713}
714
715U_CAPI void U_EXPORT2
716    ucnv_getFromUCallBack (const UConverter * converter,
717                           UConverterFromUCallback *action,
718                           const void **context)
719{
720    *action = converter->fromUCharErrorBehaviour;
721    *context = converter->fromUContext;
722}
723
724U_CAPI void    U_EXPORT2
725ucnv_setToUCallBack (UConverter * converter,
726                            UConverterToUCallback newAction,
727                            const void* newContext,
728                            UConverterToUCallback *oldAction,
729                            const void** oldContext,
730                            UErrorCode * err)
731{
732    if (U_FAILURE (*err))
733        return;
734    if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
735    converter->fromCharErrorBehaviour = newAction;
736    if (oldContext) *oldContext = converter->toUContext;
737    converter->toUContext = newContext;
738}
739
740U_CAPI void  U_EXPORT2
741ucnv_setFromUCallBack (UConverter * converter,
742                            UConverterFromUCallback newAction,
743                            const void* newContext,
744                            UConverterFromUCallback *oldAction,
745                            const void** oldContext,
746                            UErrorCode * err)
747{
748    if (U_FAILURE (*err))
749        return;
750    if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
751    converter->fromUCharErrorBehaviour = newAction;
752    if (oldContext) *oldContext = converter->fromUContext;
753    converter->fromUContext = newContext;
754}
755
756static void
757_updateOffsets(int32_t *offsets, int32_t length,
758               int32_t sourceIndex, int32_t errorInputLength) {
759    int32_t *limit;
760    int32_t delta, offset;
761
762    if(sourceIndex>=0) {
763        /*
764         * adjust each offset by adding the previous sourceIndex
765         * minus the length of the input sequence that caused an
766         * error, if any
767         */
768        delta=sourceIndex-errorInputLength;
769    } else {
770        /*
771         * set each offset to -1 because this conversion function
772         * does not handle offsets
773         */
774        delta=-1;
775    }
776
777    limit=offsets+length;
778    if(delta==0) {
779        /* most common case, nothing to do */
780    } else if(delta>0) {
781        /* add the delta to each offset (but not if the offset is <0) */
782        while(offsets<limit) {
783            offset=*offsets;
784            if(offset>=0) {
785                *offsets=offset+delta;
786            }
787            ++offsets;
788        }
789    } else /* delta<0 */ {
790        /*
791         * set each offset to -1 because this conversion function
792         * does not handle offsets
793         * or the error input sequence started in a previous buffer
794         */
795        while(offsets<limit) {
796            *offsets++=-1;
797        }
798    }
799}
800
801/* ucnv_fromUnicode --------------------------------------------------------- */
802
803/*
804 * Implementation note for m:n conversions
805 *
806 * While collecting source units to find the longest match for m:n conversion,
807 * some source units may need to be stored for a partial match.
808 * When a second buffer does not yield a match on all of the previously stored
809 * source units, then they must be "replayed", i.e., fed back into the converter.
810 *
811 * The code relies on the fact that replaying will not nest -
812 * converting a replay buffer will not result in a replay.
813 * This is because a replay is necessary only after the _continuation_ of a
814 * partial match failed, but a replay buffer is converted as a whole.
815 * It may result in some of its units being stored again for a partial match,
816 * but there will not be a continuation _during_ the replay which could fail.
817 *
818 * It is conceivable that a callback function could call the converter
819 * recursively in a way that causes another replay to be stored, but that
820 * would be an error in the callback function.
821 * Such violations will cause assertion failures in a debug build,
822 * and wrong output, but they will not cause a crash.
823 */
824
825static void
826_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
827    UConverterFromUnicode fromUnicode;
828    UConverter *cnv;
829    const UChar *s;
830    char *t;
831    int32_t *offsets;
832    int32_t sourceIndex;
833    int32_t errorInputLength;
834    UBool converterSawEndOfInput, calledCallback;
835
836    /* variables for m:n conversion */
837    UChar replay[UCNV_EXT_MAX_UCHARS];
838    const UChar *realSource, *realSourceLimit;
839    int32_t realSourceIndex;
840    UBool realFlush;
841
842    cnv=pArgs->converter;
843    s=pArgs->source;
844    t=pArgs->target;
845    offsets=pArgs->offsets;
846
847    /* get the converter implementation function */
848    sourceIndex=0;
849    if(offsets==NULL) {
850        fromUnicode=cnv->sharedData->impl->fromUnicode;
851    } else {
852        fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
853        if(fromUnicode==NULL) {
854            /* there is no WithOffsets implementation */
855            fromUnicode=cnv->sharedData->impl->fromUnicode;
856            /* we will write -1 for each offset */
857            sourceIndex=-1;
858        }
859    }
860
861    if(cnv->preFromULength>=0) {
862        /* normal mode */
863        realSource=NULL;
864
865        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
866        realSourceLimit=NULL;
867        realFlush=FALSE;
868        realSourceIndex=0;
869    } else {
870        /*
871         * Previous m:n conversion stored source units from a partial match
872         * and failed to consume all of them.
873         * We need to "replay" them from a temporary buffer and convert them first.
874         */
875        realSource=pArgs->source;
876        realSourceLimit=pArgs->sourceLimit;
877        realFlush=pArgs->flush;
878        realSourceIndex=sourceIndex;
879
880        uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
881        pArgs->source=replay;
882        pArgs->sourceLimit=replay-cnv->preFromULength;
883        pArgs->flush=FALSE;
884        sourceIndex=-1;
885
886        cnv->preFromULength=0;
887    }
888
889    /*
890     * loop for conversion and error handling
891     *
892     * loop {
893     *   convert
894     *   loop {
895     *     update offsets
896     *     handle end of input
897     *     handle errors/call callback
898     *   }
899     * }
900     */
901    for(;;) {
902        if(U_SUCCESS(*err)) {
903            /* convert */
904            fromUnicode(pArgs, err);
905
906            /*
907             * set a flag for whether the converter
908             * successfully processed the end of the input
909             *
910             * need not check cnv->preFromULength==0 because a replay (<0) will cause
911             * s<sourceLimit before converterSawEndOfInput is checked
912             */
913            converterSawEndOfInput=
914                (UBool)(U_SUCCESS(*err) &&
915                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
916                        cnv->fromUChar32==0);
917        } else {
918            /* handle error from ucnv_convertEx() */
919            converterSawEndOfInput=FALSE;
920        }
921
922        /* no callback called yet for this iteration */
923        calledCallback=FALSE;
924
925        /* no sourceIndex adjustment for conversion, only for callback output */
926        errorInputLength=0;
927
928        /*
929         * loop for offsets and error handling
930         *
931         * iterates at most 3 times:
932         * 1. to clean up after the conversion function
933         * 2. after the callback
934         * 3. after the callback again if there was truncated input
935         */
936        for(;;) {
937            /* update offsets if we write any */
938            if(offsets!=NULL) {
939                int32_t length=(int32_t)(pArgs->target-t);
940                if(length>0) {
941                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
942
943                    /*
944                     * if a converter handles offsets and updates the offsets
945                     * pointer at the end, then pArgs->offset should not change
946                     * here;
947                     * however, some converters do not handle offsets at all
948                     * (sourceIndex<0) or may not update the offsets pointer
949                     */
950                    pArgs->offsets=offsets+=length;
951                }
952
953                if(sourceIndex>=0) {
954                    sourceIndex+=(int32_t)(pArgs->source-s);
955                }
956            }
957
958            if(cnv->preFromULength<0) {
959                /*
960                 * switch the source to new replay units (cannot occur while replaying)
961                 * after offset handling and before end-of-input and callback handling
962                 */
963                if(realSource==NULL) {
964                    realSource=pArgs->source;
965                    realSourceLimit=pArgs->sourceLimit;
966                    realFlush=pArgs->flush;
967                    realSourceIndex=sourceIndex;
968
969                    uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
970                    pArgs->source=replay;
971                    pArgs->sourceLimit=replay-cnv->preFromULength;
972                    pArgs->flush=FALSE;
973                    if((sourceIndex+=cnv->preFromULength)<0) {
974                        sourceIndex=-1;
975                    }
976
977                    cnv->preFromULength=0;
978                } else {
979                    /* see implementation note before _fromUnicodeWithCallback() */
980                    U_ASSERT(realSource==NULL);
981                    *err=U_INTERNAL_PROGRAM_ERROR;
982                }
983            }
984
985            /* update pointers */
986            s=pArgs->source;
987            t=pArgs->target;
988
989            if(U_SUCCESS(*err)) {
990                if(s<pArgs->sourceLimit) {
991                    /*
992                     * continue with the conversion loop while there is still input left
993                     * (continue converting by breaking out of only the inner loop)
994                     */
995                    break;
996                } else if(realSource!=NULL) {
997                    /* switch back from replaying to the real source and continue */
998                    pArgs->source=realSource;
999                    pArgs->sourceLimit=realSourceLimit;
1000                    pArgs->flush=realFlush;
1001                    sourceIndex=realSourceIndex;
1002
1003                    realSource=NULL;
1004                    break;
1005                } else if(pArgs->flush && cnv->fromUChar32!=0) {
1006                    /*
1007                     * the entire input stream is consumed
1008                     * and there is a partial, truncated input sequence left
1009                     */
1010
1011                    /* inject an error and continue with callback handling */
1012                    *err=U_TRUNCATED_CHAR_FOUND;
1013                    calledCallback=FALSE; /* new error condition */
1014                } else {
1015                    /* input consumed */
1016                    if(pArgs->flush) {
1017                        /*
1018                         * return to the conversion loop once more if the flush
1019                         * flag is set and the conversion function has not
1020                         * successfully processed the end of the input yet
1021                         *
1022                         * (continue converting by breaking out of only the inner loop)
1023                         */
1024                        if(!converterSawEndOfInput) {
1025                            break;
1026                        }
1027
1028                        /* reset the converter without calling the callback function */
1029                        _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1030                    }
1031
1032                    /* done successfully */
1033                    return;
1034                }
1035            }
1036
1037            /* U_FAILURE(*err) */
1038            {
1039                UErrorCode e;
1040
1041                if( calledCallback ||
1042                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1043                    (e!=U_INVALID_CHAR_FOUND &&
1044                     e!=U_ILLEGAL_CHAR_FOUND &&
1045                     e!=U_TRUNCATED_CHAR_FOUND)
1046                ) {
1047                    /*
1048                     * the callback did not or cannot resolve the error:
1049                     * set output pointers and return
1050                     *
1051                     * the check for buffer overflow is redundant but it is
1052                     * a high-runner case and hopefully documents the intent
1053                     * well
1054                     *
1055                     * if we were replaying, then the replay buffer must be
1056                     * copied back into the UConverter
1057                     * and the real arguments must be restored
1058                     */
1059                    if(realSource!=NULL) {
1060                        int32_t length;
1061
1062                        U_ASSERT(cnv->preFromULength==0);
1063
1064                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1065                        if(length>0) {
1066                            uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
1067                            cnv->preFromULength=(int8_t)-length;
1068                        }
1069
1070                        pArgs->source=realSource;
1071                        pArgs->sourceLimit=realSourceLimit;
1072                        pArgs->flush=realFlush;
1073                    }
1074
1075                    return;
1076                }
1077            }
1078
1079            /* callback handling */
1080            {
1081                UChar32 codePoint;
1082
1083                /* get and write the code point */
1084                codePoint=cnv->fromUChar32;
1085                errorInputLength=0;
1086                U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1087                cnv->invalidUCharLength=(int8_t)errorInputLength;
1088
1089                /* set the converter state to deal with the next character */
1090                cnv->fromUChar32=0;
1091
1092                /* call the callback function */
1093                cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1094                    cnv->invalidUCharBuffer, errorInputLength, codePoint,
1095                    *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1096                    err);
1097            }
1098
1099            /*
1100             * loop back to the offset handling
1101             *
1102             * this flag will indicate after offset handling
1103             * that a callback was called;
1104             * if the callback did not resolve the error, then we return
1105             */
1106            calledCallback=TRUE;
1107        }
1108    }
1109}
1110
1111/*
1112 * Output the fromUnicode overflow buffer.
1113 * Call this function if(cnv->charErrorBufferLength>0).
1114 * @return TRUE if overflow
1115 */
1116static UBool
1117ucnv_outputOverflowFromUnicode(UConverter *cnv,
1118                               char **target, const char *targetLimit,
1119                               int32_t **pOffsets,
1120                               UErrorCode *err) {
1121    int32_t *offsets;
1122    char *overflow, *t;
1123    int32_t i, length;
1124
1125    t=*target;
1126    if(pOffsets!=NULL) {
1127        offsets=*pOffsets;
1128    } else {
1129        offsets=NULL;
1130    }
1131
1132    overflow=(char *)cnv->charErrorBuffer;
1133    length=cnv->charErrorBufferLength;
1134    i=0;
1135    while(i<length) {
1136        if(t==targetLimit) {
1137            /* the overflow buffer contains too much, keep the rest */
1138            int32_t j=0;
1139
1140            do {
1141                overflow[j++]=overflow[i++];
1142            } while(i<length);
1143
1144            cnv->charErrorBufferLength=(int8_t)j;
1145            *target=t;
1146            if(offsets!=NULL) {
1147                *pOffsets=offsets;
1148            }
1149            *err=U_BUFFER_OVERFLOW_ERROR;
1150            return TRUE;
1151        }
1152
1153        /* copy the overflow contents to the target */
1154        *t++=overflow[i++];
1155        if(offsets!=NULL) {
1156            *offsets++=-1; /* no source index available for old output */
1157        }
1158    }
1159
1160    /* the overflow buffer is completely copied to the target */
1161    cnv->charErrorBufferLength=0;
1162    *target=t;
1163    if(offsets!=NULL) {
1164        *pOffsets=offsets;
1165    }
1166    return FALSE;
1167}
1168
1169U_CAPI void U_EXPORT2
1170ucnv_fromUnicode(UConverter *cnv,
1171                 char **target, const char *targetLimit,
1172                 const UChar **source, const UChar *sourceLimit,
1173                 int32_t *offsets,
1174                 UBool flush,
1175                 UErrorCode *err) {
1176    UConverterFromUnicodeArgs args;
1177    const UChar *s;
1178    char *t;
1179
1180    /* check parameters */
1181    if(err==NULL || U_FAILURE(*err)) {
1182        return;
1183    }
1184
1185    if(cnv==NULL || target==NULL || source==NULL) {
1186        *err=U_ILLEGAL_ARGUMENT_ERROR;
1187        return;
1188    }
1189
1190    s=*source;
1191    t=*target;
1192
1193    if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1194        /*
1195        Prevent code from going into an infinite loop in case we do hit this
1196        limit. The limit pointer is expected to be on a UChar * boundary.
1197        This also prevents the next argument check from failing.
1198        */
1199        sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
1200    }
1201
1202    /*
1203     * All these conditions should never happen.
1204     *
1205     * 1) Make sure that the limits are >= to the address source or target
1206     *
1207     * 2) Make sure that the buffer sizes do not exceed the number range for
1208     * int32_t because some functions use the size (in units or bytes)
1209     * rather than comparing pointers, and because offsets are int32_t values.
1210     *
1211     * size_t is guaranteed to be unsigned and large enough for the job.
1212     *
1213     * Return with an error instead of adjusting the limits because we would
1214     * not be able to maintain the semantics that either the source must be
1215     * consumed or the target filled (unless an error occurs).
1216     * An adjustment would be targetLimit=t+0x7fffffff; for example.
1217     *
1218     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1219     * to a char * pointer and provide an incomplete UChar code unit.
1220     */
1221    if (sourceLimit<s || targetLimit<t ||
1222        ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1223        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1224        (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1225    {
1226        *err=U_ILLEGAL_ARGUMENT_ERROR;
1227        return;
1228    }
1229
1230    /* output the target overflow buffer */
1231    if( cnv->charErrorBufferLength>0 &&
1232        ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1233    ) {
1234        /* U_BUFFER_OVERFLOW_ERROR */
1235        return;
1236    }
1237    /* *target may have moved, therefore stop using t */
1238
1239    if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1240        /* the overflow buffer is emptied and there is no new input: we are done */
1241        return;
1242    }
1243
1244    /*
1245     * Do not simply return with a buffer overflow error if
1246     * !flush && t==targetLimit
1247     * because it is possible that the source will not generate any output.
1248     * For example, the skip callback may be called;
1249     * it does not output anything.
1250     */
1251
1252    /* prepare the converter arguments */
1253    args.converter=cnv;
1254    args.flush=flush;
1255    args.offsets=offsets;
1256    args.source=s;
1257    args.sourceLimit=sourceLimit;
1258    args.target=*target;
1259    args.targetLimit=targetLimit;
1260    args.size=sizeof(args);
1261
1262    _fromUnicodeWithCallback(&args, err);
1263
1264    *source=args.source;
1265    *target=args.target;
1266}
1267
1268/* ucnv_toUnicode() --------------------------------------------------------- */
1269
1270static void
1271_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1272    UConverterToUnicode toUnicode;
1273    UConverter *cnv;
1274    const char *s;
1275    UChar *t;
1276    int32_t *offsets;
1277    int32_t sourceIndex;
1278    int32_t errorInputLength;
1279    UBool converterSawEndOfInput, calledCallback;
1280
1281    /* variables for m:n conversion */
1282    char replay[UCNV_EXT_MAX_BYTES];
1283    const char *realSource, *realSourceLimit;
1284    int32_t realSourceIndex;
1285    UBool realFlush;
1286
1287    cnv=pArgs->converter;
1288    s=pArgs->source;
1289    t=pArgs->target;
1290    offsets=pArgs->offsets;
1291
1292    /* get the converter implementation function */
1293    sourceIndex=0;
1294    if(offsets==NULL) {
1295        toUnicode=cnv->sharedData->impl->toUnicode;
1296    } else {
1297        toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1298        if(toUnicode==NULL) {
1299            /* there is no WithOffsets implementation */
1300            toUnicode=cnv->sharedData->impl->toUnicode;
1301            /* we will write -1 for each offset */
1302            sourceIndex=-1;
1303        }
1304    }
1305
1306    if(cnv->preToULength>=0) {
1307        /* normal mode */
1308        realSource=NULL;
1309
1310        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1311        realSourceLimit=NULL;
1312        realFlush=FALSE;
1313        realSourceIndex=0;
1314    } else {
1315        /*
1316         * Previous m:n conversion stored source units from a partial match
1317         * and failed to consume all of them.
1318         * We need to "replay" them from a temporary buffer and convert them first.
1319         */
1320        realSource=pArgs->source;
1321        realSourceLimit=pArgs->sourceLimit;
1322        realFlush=pArgs->flush;
1323        realSourceIndex=sourceIndex;
1324
1325        uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1326        pArgs->source=replay;
1327        pArgs->sourceLimit=replay-cnv->preToULength;
1328        pArgs->flush=FALSE;
1329        sourceIndex=-1;
1330
1331        cnv->preToULength=0;
1332    }
1333
1334    /*
1335     * loop for conversion and error handling
1336     *
1337     * loop {
1338     *   convert
1339     *   loop {
1340     *     update offsets
1341     *     handle end of input
1342     *     handle errors/call callback
1343     *   }
1344     * }
1345     */
1346    for(;;) {
1347        if(U_SUCCESS(*err)) {
1348            /* convert */
1349            toUnicode(pArgs, err);
1350
1351            /*
1352             * set a flag for whether the converter
1353             * successfully processed the end of the input
1354             *
1355             * need not check cnv->preToULength==0 because a replay (<0) will cause
1356             * s<sourceLimit before converterSawEndOfInput is checked
1357             */
1358            converterSawEndOfInput=
1359                (UBool)(U_SUCCESS(*err) &&
1360                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1361                        cnv->toULength==0);
1362        } else {
1363            /* handle error from getNextUChar() or ucnv_convertEx() */
1364            converterSawEndOfInput=FALSE;
1365        }
1366
1367        /* no callback called yet for this iteration */
1368        calledCallback=FALSE;
1369
1370        /* no sourceIndex adjustment for conversion, only for callback output */
1371        errorInputLength=0;
1372
1373        /*
1374         * loop for offsets and error handling
1375         *
1376         * iterates at most 3 times:
1377         * 1. to clean up after the conversion function
1378         * 2. after the callback
1379         * 3. after the callback again if there was truncated input
1380         */
1381        for(;;) {
1382            /* update offsets if we write any */
1383            if(offsets!=NULL) {
1384                int32_t length=(int32_t)(pArgs->target-t);
1385                if(length>0) {
1386                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1387
1388                    /*
1389                     * if a converter handles offsets and updates the offsets
1390                     * pointer at the end, then pArgs->offset should not change
1391                     * here;
1392                     * however, some converters do not handle offsets at all
1393                     * (sourceIndex<0) or may not update the offsets pointer
1394                     */
1395                    pArgs->offsets=offsets+=length;
1396                }
1397
1398                if(sourceIndex>=0) {
1399                    sourceIndex+=(int32_t)(pArgs->source-s);
1400                }
1401            }
1402
1403            if(cnv->preToULength<0) {
1404                /*
1405                 * switch the source to new replay units (cannot occur while replaying)
1406                 * after offset handling and before end-of-input and callback handling
1407                 */
1408                if(realSource==NULL) {
1409                    realSource=pArgs->source;
1410                    realSourceLimit=pArgs->sourceLimit;
1411                    realFlush=pArgs->flush;
1412                    realSourceIndex=sourceIndex;
1413
1414                    uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1415                    pArgs->source=replay;
1416                    pArgs->sourceLimit=replay-cnv->preToULength;
1417                    pArgs->flush=FALSE;
1418                    if((sourceIndex+=cnv->preToULength)<0) {
1419                        sourceIndex=-1;
1420                    }
1421
1422                    cnv->preToULength=0;
1423                } else {
1424                    /* see implementation note before _fromUnicodeWithCallback() */
1425                    U_ASSERT(realSource==NULL);
1426                    *err=U_INTERNAL_PROGRAM_ERROR;
1427                }
1428            }
1429
1430            /* update pointers */
1431            s=pArgs->source;
1432            t=pArgs->target;
1433
1434            if(U_SUCCESS(*err)) {
1435                if(s<pArgs->sourceLimit) {
1436                    /*
1437                     * continue with the conversion loop while there is still input left
1438                     * (continue converting by breaking out of only the inner loop)
1439                     */
1440                    break;
1441                } else if(realSource!=NULL) {
1442                    /* switch back from replaying to the real source and continue */
1443                    pArgs->source=realSource;
1444                    pArgs->sourceLimit=realSourceLimit;
1445                    pArgs->flush=realFlush;
1446                    sourceIndex=realSourceIndex;
1447
1448                    realSource=NULL;
1449                    break;
1450                } else if(pArgs->flush && cnv->toULength>0) {
1451                    /*
1452                     * the entire input stream is consumed
1453                     * and there is a partial, truncated input sequence left
1454                     */
1455
1456                    /* inject an error and continue with callback handling */
1457                    *err=U_TRUNCATED_CHAR_FOUND;
1458                    calledCallback=FALSE; /* new error condition */
1459                } else {
1460                    /* input consumed */
1461                    if(pArgs->flush) {
1462                        /*
1463                         * return to the conversion loop once more if the flush
1464                         * flag is set and the conversion function has not
1465                         * successfully processed the end of the input yet
1466                         *
1467                         * (continue converting by breaking out of only the inner loop)
1468                         */
1469                        if(!converterSawEndOfInput) {
1470                            break;
1471                        }
1472
1473                        /* reset the converter without calling the callback function */
1474                        _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1475                    }
1476
1477                    /* done successfully */
1478                    return;
1479                }
1480            }
1481
1482            /* U_FAILURE(*err) */
1483            {
1484                UErrorCode e;
1485
1486                if( calledCallback ||
1487                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1488                    (e!=U_INVALID_CHAR_FOUND &&
1489                     e!=U_ILLEGAL_CHAR_FOUND &&
1490                     e!=U_TRUNCATED_CHAR_FOUND &&
1491                     e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1492                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1493                ) {
1494                    /*
1495                     * the callback did not or cannot resolve the error:
1496                     * set output pointers and return
1497                     *
1498                     * the check for buffer overflow is redundant but it is
1499                     * a high-runner case and hopefully documents the intent
1500                     * well
1501                     *
1502                     * if we were replaying, then the replay buffer must be
1503                     * copied back into the UConverter
1504                     * and the real arguments must be restored
1505                     */
1506                    if(realSource!=NULL) {
1507                        int32_t length;
1508
1509                        U_ASSERT(cnv->preToULength==0);
1510
1511                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1512                        if(length>0) {
1513                            uprv_memcpy(cnv->preToU, pArgs->source, length);
1514                            cnv->preToULength=(int8_t)-length;
1515                        }
1516
1517                        pArgs->source=realSource;
1518                        pArgs->sourceLimit=realSourceLimit;
1519                        pArgs->flush=realFlush;
1520                    }
1521
1522                    return;
1523                }
1524            }
1525
1526            /* copy toUBytes[] to invalidCharBuffer[] */
1527            errorInputLength=cnv->invalidCharLength=cnv->toULength;
1528            if(errorInputLength>0) {
1529                uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1530            }
1531
1532            /* set the converter state to deal with the next character */
1533            cnv->toULength=0;
1534
1535            /* call the callback function */
1536            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
1537                cnv->toUCallbackReason = UCNV_UNASSIGNED;
1538            }
1539            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1540                cnv->invalidCharBuffer, errorInputLength,
1541                cnv->toUCallbackReason,
1542                err);
1543            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
1544
1545            /*
1546             * loop back to the offset handling
1547             *
1548             * this flag will indicate after offset handling
1549             * that a callback was called;
1550             * if the callback did not resolve the error, then we return
1551             */
1552            calledCallback=TRUE;
1553        }
1554    }
1555}
1556
1557/*
1558 * Output the toUnicode overflow buffer.
1559 * Call this function if(cnv->UCharErrorBufferLength>0).
1560 * @return TRUE if overflow
1561 */
1562static UBool
1563ucnv_outputOverflowToUnicode(UConverter *cnv,
1564                             UChar **target, const UChar *targetLimit,
1565                             int32_t **pOffsets,
1566                             UErrorCode *err) {
1567    int32_t *offsets;
1568    UChar *overflow, *t;
1569    int32_t i, length;
1570
1571    t=*target;
1572    if(pOffsets!=NULL) {
1573        offsets=*pOffsets;
1574    } else {
1575        offsets=NULL;
1576    }
1577
1578    overflow=cnv->UCharErrorBuffer;
1579    length=cnv->UCharErrorBufferLength;
1580    i=0;
1581    while(i<length) {
1582        if(t==targetLimit) {
1583            /* the overflow buffer contains too much, keep the rest */
1584            int32_t j=0;
1585
1586            do {
1587                overflow[j++]=overflow[i++];
1588            } while(i<length);
1589
1590            cnv->UCharErrorBufferLength=(int8_t)j;
1591            *target=t;
1592            if(offsets!=NULL) {
1593                *pOffsets=offsets;
1594            }
1595            *err=U_BUFFER_OVERFLOW_ERROR;
1596            return TRUE;
1597        }
1598
1599        /* copy the overflow contents to the target */
1600        *t++=overflow[i++];
1601        if(offsets!=NULL) {
1602            *offsets++=-1; /* no source index available for old output */
1603        }
1604    }
1605
1606    /* the overflow buffer is completely copied to the target */
1607    cnv->UCharErrorBufferLength=0;
1608    *target=t;
1609    if(offsets!=NULL) {
1610        *pOffsets=offsets;
1611    }
1612    return FALSE;
1613}
1614
1615U_CAPI void U_EXPORT2
1616ucnv_toUnicode(UConverter *cnv,
1617               UChar **target, const UChar *targetLimit,
1618               const char **source, const char *sourceLimit,
1619               int32_t *offsets,
1620               UBool flush,
1621               UErrorCode *err) {
1622    UConverterToUnicodeArgs args;
1623    const char *s;
1624    UChar *t;
1625
1626    /* check parameters */
1627    if(err==NULL || U_FAILURE(*err)) {
1628        return;
1629    }
1630
1631    if(cnv==NULL || target==NULL || source==NULL) {
1632        *err=U_ILLEGAL_ARGUMENT_ERROR;
1633        return;
1634    }
1635
1636    s=*source;
1637    t=*target;
1638
1639    if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1640        /*
1641        Prevent code from going into an infinite loop in case we do hit this
1642        limit. The limit pointer is expected to be on a UChar * boundary.
1643        This also prevents the next argument check from failing.
1644        */
1645        targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
1646    }
1647
1648    /*
1649     * All these conditions should never happen.
1650     *
1651     * 1) Make sure that the limits are >= to the address source or target
1652     *
1653     * 2) Make sure that the buffer sizes do not exceed the number range for
1654     * int32_t because some functions use the size (in units or bytes)
1655     * rather than comparing pointers, and because offsets are int32_t values.
1656     *
1657     * size_t is guaranteed to be unsigned and large enough for the job.
1658     *
1659     * Return with an error instead of adjusting the limits because we would
1660     * not be able to maintain the semantics that either the source must be
1661     * consumed or the target filled (unless an error occurs).
1662     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1663     *
1664     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1665     * to a char * pointer and provide an incomplete UChar code unit.
1666     */
1667    if (sourceLimit<s || targetLimit<t ||
1668        ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1669        ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1670        (((const char *)targetLimit-(const char *)t) & 1) != 0
1671    ) {
1672        *err=U_ILLEGAL_ARGUMENT_ERROR;
1673        return;
1674    }
1675
1676    /* output the target overflow buffer */
1677    if( cnv->UCharErrorBufferLength>0 &&
1678        ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1679    ) {
1680        /* U_BUFFER_OVERFLOW_ERROR */
1681        return;
1682    }
1683    /* *target may have moved, therefore stop using t */
1684
1685    if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1686        /* the overflow buffer is emptied and there is no new input: we are done */
1687        return;
1688    }
1689
1690    /*
1691     * Do not simply return with a buffer overflow error if
1692     * !flush && t==targetLimit
1693     * because it is possible that the source will not generate any output.
1694     * For example, the skip callback may be called;
1695     * it does not output anything.
1696     */
1697
1698    /* prepare the converter arguments */
1699    args.converter=cnv;
1700    args.flush=flush;
1701    args.offsets=offsets;
1702    args.source=s;
1703    args.sourceLimit=sourceLimit;
1704    args.target=*target;
1705    args.targetLimit=targetLimit;
1706    args.size=sizeof(args);
1707
1708    _toUnicodeWithCallback(&args, err);
1709
1710    *source=args.source;
1711    *target=args.target;
1712}
1713
1714/* ucnv_to/fromUChars() ----------------------------------------------------- */
1715
1716U_CAPI int32_t U_EXPORT2
1717ucnv_fromUChars(UConverter *cnv,
1718                char *dest, int32_t destCapacity,
1719                const UChar *src, int32_t srcLength,
1720                UErrorCode *pErrorCode) {
1721    const UChar *srcLimit;
1722    char *originalDest, *destLimit;
1723    int32_t destLength;
1724
1725    /* check arguments */
1726    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1727        return 0;
1728    }
1729
1730    if( cnv==NULL ||
1731        destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1732        srcLength<-1 || (srcLength!=0 && src==NULL)
1733    ) {
1734        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1735        return 0;
1736    }
1737
1738    /* initialize */
1739    ucnv_resetFromUnicode(cnv);
1740    originalDest=dest;
1741    if(srcLength==-1) {
1742        srcLength=u_strlen(src);
1743    }
1744    if(srcLength>0) {
1745        srcLimit=src+srcLength;
1746        destLimit=dest+destCapacity;
1747
1748        /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1749        if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1750            destLimit=(char *)U_MAX_PTR(dest);
1751        }
1752
1753        /* perform the conversion */
1754        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1755        destLength=(int32_t)(dest-originalDest);
1756
1757        /* if an overflow occurs, then get the preflighting length */
1758        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1759            char buffer[1024];
1760
1761            destLimit=buffer+sizeof(buffer);
1762            do {
1763                dest=buffer;
1764                *pErrorCode=U_ZERO_ERROR;
1765                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1766                destLength+=(int32_t)(dest-buffer);
1767            } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1768        }
1769    } else {
1770        destLength=0;
1771    }
1772
1773    return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1774}
1775
1776U_CAPI int32_t U_EXPORT2
1777ucnv_toUChars(UConverter *cnv,
1778              UChar *dest, int32_t destCapacity,
1779              const char *src, int32_t srcLength,
1780              UErrorCode *pErrorCode) {
1781    const char *srcLimit;
1782    UChar *originalDest, *destLimit;
1783    int32_t destLength;
1784
1785    /* check arguments */
1786    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1787        return 0;
1788    }
1789
1790    if( cnv==NULL ||
1791        destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1792        srcLength<-1 || (srcLength!=0 && src==NULL))
1793    {
1794        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1795        return 0;
1796    }
1797
1798    /* initialize */
1799    ucnv_resetToUnicode(cnv);
1800    originalDest=dest;
1801    if(srcLength==-1) {
1802        srcLength=(int32_t)uprv_strlen(src);
1803    }
1804    if(srcLength>0) {
1805        srcLimit=src+srcLength;
1806        destLimit=dest+destCapacity;
1807
1808        /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1809        if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1810            destLimit=(UChar *)U_MAX_PTR(dest);
1811        }
1812
1813        /* perform the conversion */
1814        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1815        destLength=(int32_t)(dest-originalDest);
1816
1817        /* if an overflow occurs, then get the preflighting length */
1818        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1819        {
1820            UChar buffer[1024];
1821
1822            destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1823            do {
1824                dest=buffer;
1825                *pErrorCode=U_ZERO_ERROR;
1826                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1827                destLength+=(int32_t)(dest-buffer);
1828            }
1829            while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1830        }
1831    } else {
1832        destLength=0;
1833    }
1834
1835    return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1836}
1837
1838/* ucnv_getNextUChar() ------------------------------------------------------ */
1839
1840U_CAPI UChar32 U_EXPORT2
1841ucnv_getNextUChar(UConverter *cnv,
1842                  const char **source, const char *sourceLimit,
1843                  UErrorCode *err) {
1844    UConverterToUnicodeArgs args;
1845    UChar buffer[U16_MAX_LENGTH];
1846    const char *s;
1847    UChar32 c;
1848    int32_t i, length;
1849
1850    /* check parameters */
1851    if(err==NULL || U_FAILURE(*err)) {
1852        return 0xffff;
1853    }
1854
1855    if(cnv==NULL || source==NULL) {
1856        *err=U_ILLEGAL_ARGUMENT_ERROR;
1857        return 0xffff;
1858    }
1859
1860    s=*source;
1861    if(sourceLimit<s) {
1862        *err=U_ILLEGAL_ARGUMENT_ERROR;
1863        return 0xffff;
1864    }
1865
1866    /*
1867     * Make sure that the buffer sizes do not exceed the number range for
1868     * int32_t because some functions use the size (in units or bytes)
1869     * rather than comparing pointers, and because offsets are int32_t values.
1870     *
1871     * size_t is guaranteed to be unsigned and large enough for the job.
1872     *
1873     * Return with an error instead of adjusting the limits because we would
1874     * not be able to maintain the semantics that either the source must be
1875     * consumed or the target filled (unless an error occurs).
1876     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1877     */
1878    if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1879        *err=U_ILLEGAL_ARGUMENT_ERROR;
1880        return 0xffff;
1881    }
1882
1883    c=U_SENTINEL;
1884
1885    /* flush the target overflow buffer */
1886    if(cnv->UCharErrorBufferLength>0) {
1887        UChar *overflow;
1888
1889        overflow=cnv->UCharErrorBuffer;
1890        i=0;
1891        length=cnv->UCharErrorBufferLength;
1892        U16_NEXT(overflow, i, length, c);
1893
1894        /* move the remaining overflow contents up to the beginning */
1895        if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1896            uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1897                         cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1898        }
1899
1900        if(!U16_IS_LEAD(c) || i<length) {
1901            return c;
1902        }
1903        /*
1904         * Continue if the overflow buffer contained only a lead surrogate,
1905         * in case the converter outputs single surrogates from complete
1906         * input sequences.
1907         */
1908    }
1909
1910    /*
1911     * flush==TRUE is implied for ucnv_getNextUChar()
1912     *
1913     * do not simply return even if s==sourceLimit because the converter may
1914     * not have seen flush==TRUE before
1915     */
1916
1917    /* prepare the converter arguments */
1918    args.converter=cnv;
1919    args.flush=TRUE;
1920    args.offsets=NULL;
1921    args.source=s;
1922    args.sourceLimit=sourceLimit;
1923    args.target=buffer;
1924    args.targetLimit=buffer+1;
1925    args.size=sizeof(args);
1926
1927    if(c<0) {
1928        /*
1929         * call the native getNextUChar() implementation if we are
1930         * at a character boundary (toULength==0)
1931         *
1932         * unlike with _toUnicode(), getNextUChar() implementations must set
1933         * U_TRUNCATED_CHAR_FOUND for truncated input,
1934         * in addition to setting toULength/toUBytes[]
1935         */
1936        if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1937            c=cnv->sharedData->impl->getNextUChar(&args, err);
1938            *source=s=args.source;
1939            if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1940                /* reset the converter without calling the callback function */
1941                _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1942                return 0xffff; /* no output */
1943            } else if(U_SUCCESS(*err) && c>=0) {
1944                return c;
1945            /*
1946             * else fall through to use _toUnicode() because
1947             *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1948             *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
1949             */
1950            }
1951        }
1952
1953        /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1954        _toUnicodeWithCallback(&args, err);
1955
1956        if(*err==U_BUFFER_OVERFLOW_ERROR) {
1957            *err=U_ZERO_ERROR;
1958        }
1959
1960        i=0;
1961        length=(int32_t)(args.target-buffer);
1962    } else {
1963        /* write the lead surrogate from the overflow buffer */
1964        buffer[0]=(UChar)c;
1965        args.target=buffer+1;
1966        i=0;
1967        length=1;
1968    }
1969
1970    /* buffer contents starts at i and ends before length */
1971
1972    if(U_FAILURE(*err)) {
1973        c=0xffff; /* no output */
1974    } else if(length==0) {
1975        /* no input or only state changes */
1976        *err=U_INDEX_OUTOFBOUNDS_ERROR;
1977        /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1978        c=0xffff; /* no output */
1979    } else {
1980        c=buffer[0];
1981        i=1;
1982        if(!U16_IS_LEAD(c)) {
1983            /* consume c=buffer[0], done */
1984        } else {
1985            /* got a lead surrogate, see if a trail surrogate follows */
1986            UChar c2;
1987
1988            if(cnv->UCharErrorBufferLength>0) {
1989                /* got overflow output from the conversion */
1990                if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1991                    /* got a trail surrogate, too */
1992                    c=U16_GET_SUPPLEMENTARY(c, c2);
1993
1994                    /* move the remaining overflow contents up to the beginning */
1995                    if((--cnv->UCharErrorBufferLength)>0) {
1996                        uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1997                                     cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1998                    }
1999                } else {
2000                    /* c is an unpaired lead surrogate, just return it */
2001                }
2002            } else if(args.source<sourceLimit) {
2003                /* convert once more, to buffer[1] */
2004                args.targetLimit=buffer+2;
2005                _toUnicodeWithCallback(&args, err);
2006                if(*err==U_BUFFER_OVERFLOW_ERROR) {
2007                    *err=U_ZERO_ERROR;
2008                }
2009
2010                length=(int32_t)(args.target-buffer);
2011                if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2012                    /* got a trail surrogate, too */
2013                    c=U16_GET_SUPPLEMENTARY(c, c2);
2014                    i=2;
2015                }
2016            }
2017        }
2018    }
2019
2020    /*
2021     * move leftover output from buffer[i..length[
2022     * into the beginning of the overflow buffer
2023     */
2024    if(i<length) {
2025        /* move further overflow back */
2026        int32_t delta=length-i;
2027        if((length=cnv->UCharErrorBufferLength)>0) {
2028            uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2029                         length*U_SIZEOF_UCHAR);
2030        }
2031        cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2032
2033        cnv->UCharErrorBuffer[0]=buffer[i++];
2034        if(delta>1) {
2035            cnv->UCharErrorBuffer[1]=buffer[i];
2036        }
2037    }
2038
2039    *source=args.source;
2040    return c;
2041}
2042
2043/* ucnv_convert() and siblings ---------------------------------------------- */
2044
2045U_CAPI void U_EXPORT2
2046ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2047               char **target, const char *targetLimit,
2048               const char **source, const char *sourceLimit,
2049               UChar *pivotStart, UChar **pivotSource,
2050               UChar **pivotTarget, const UChar *pivotLimit,
2051               UBool reset, UBool flush,
2052               UErrorCode *pErrorCode) {
2053    UChar pivotBuffer[CHUNK_SIZE];
2054    const UChar *myPivotSource;
2055    UChar *myPivotTarget;
2056    const char *s;
2057    char *t;
2058
2059    UConverterToUnicodeArgs toUArgs;
2060    UConverterFromUnicodeArgs fromUArgs;
2061    UConverterConvert convert;
2062
2063    /* error checking */
2064    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2065        return;
2066    }
2067
2068    if( targetCnv==NULL || sourceCnv==NULL ||
2069        source==NULL || *source==NULL ||
2070        target==NULL || *target==NULL || targetLimit==NULL
2071    ) {
2072        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2073        return;
2074    }
2075
2076    s=*source;
2077    t=*target;
2078    if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2079        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2080        return;
2081    }
2082
2083    /*
2084     * Make sure that the buffer sizes do not exceed the number range for
2085     * int32_t. See ucnv_toUnicode() for a more detailed comment.
2086     */
2087    if(
2088        (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2089        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2090    ) {
2091        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2092        return;
2093    }
2094
2095    if(pivotStart==NULL) {
2096        if(!flush) {
2097            /* streaming conversion requires an explicit pivot buffer */
2098            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2099            return;
2100        }
2101
2102        /* use the stack pivot buffer */
2103        myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2104        pivotSource=(UChar **)&myPivotSource;
2105        pivotTarget=&myPivotTarget;
2106        pivotLimit=pivotBuffer+CHUNK_SIZE;
2107    } else if(  pivotStart>=pivotLimit ||
2108                pivotSource==NULL || *pivotSource==NULL ||
2109                pivotTarget==NULL || *pivotTarget==NULL ||
2110                pivotLimit==NULL
2111    ) {
2112        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2113        return;
2114    }
2115
2116    if(sourceLimit==NULL) {
2117        /* get limit of single-byte-NUL-terminated source string */
2118        sourceLimit=uprv_strchr(*source, 0);
2119    }
2120
2121    if(reset) {
2122        ucnv_resetToUnicode(sourceCnv);
2123        ucnv_resetFromUnicode(targetCnv);
2124        *pivotSource=*pivotTarget=pivotStart;
2125    } else if(targetCnv->charErrorBufferLength>0) {
2126        /* output the targetCnv overflow buffer */
2127        if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2128            /* U_BUFFER_OVERFLOW_ERROR */
2129            return;
2130        }
2131        /* *target has moved, therefore stop using t */
2132
2133        if( !flush &&
2134            targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2135            sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2136        ) {
2137            /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2138            return;
2139        }
2140    }
2141
2142    /* Is direct-UTF-8 conversion available? */
2143    if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2144        targetCnv->sharedData->impl->fromUTF8!=NULL
2145    ) {
2146        convert=targetCnv->sharedData->impl->fromUTF8;
2147    } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2148               sourceCnv->sharedData->impl->toUTF8!=NULL
2149    ) {
2150        convert=sourceCnv->sharedData->impl->toUTF8;
2151    } else {
2152        convert=NULL;
2153    }
2154
2155    /*
2156     * If direct-UTF-8 conversion is available, then we use a smaller
2157     * pivot buffer for error handling and partial matches
2158     * so that we quickly return to direct conversion.
2159     *
2160     * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2161     *
2162     * We could reduce the pivot buffer size further, at the cost of
2163     * buffer overflows from callbacks.
2164     * The pivot buffer should not be smaller than the maximum number of
2165     * fromUnicode extension table input UChars
2166     * (for m:n conversion, see
2167     * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2168     * or 2 for surrogate pairs.
2169     *
2170     * Too small a buffer can cause thrashing between pivoting and direct
2171     * conversion, with function call overhead outweighing the benefits
2172     * of direct conversion.
2173     */
2174    if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2175        pivotLimit=pivotStart+32;
2176    }
2177
2178    /* prepare the converter arguments */
2179    fromUArgs.converter=targetCnv;
2180    fromUArgs.flush=FALSE;
2181    fromUArgs.offsets=NULL;
2182    fromUArgs.target=*target;
2183    fromUArgs.targetLimit=targetLimit;
2184    fromUArgs.size=sizeof(fromUArgs);
2185
2186    toUArgs.converter=sourceCnv;
2187    toUArgs.flush=flush;
2188    toUArgs.offsets=NULL;
2189    toUArgs.source=s;
2190    toUArgs.sourceLimit=sourceLimit;
2191    toUArgs.targetLimit=pivotLimit;
2192    toUArgs.size=sizeof(toUArgs);
2193
2194    /*
2195     * TODO: Consider separating this function into two functions,
2196     * extracting exactly the conversion loop,
2197     * for readability and to reduce the set of visible variables.
2198     *
2199     * Otherwise stop using s and t from here on.
2200     */
2201    s=t=NULL;
2202
2203    /*
2204     * conversion loop
2205     *
2206     * The sequence of steps in the loop may appear backward,
2207     * but the principle is simple:
2208     * In the chain of
2209     *   source - sourceCnv overflow - pivot - targetCnv overflow - target
2210     * empty out later buffers before refilling them from earlier ones.
2211     *
2212     * The targetCnv overflow buffer is flushed out only once before the loop.
2213     */
2214    for(;;) {
2215        /*
2216         * if(pivot not empty or error or replay or flush fromUnicode) {
2217         *   fromUnicode(pivot -> target);
2218         * }
2219         *
2220         * For pivoting conversion; and for direct conversion for
2221         * error callback handling and flushing the replay buffer.
2222         */
2223        if( *pivotSource<*pivotTarget ||
2224            U_FAILURE(*pErrorCode) ||
2225            targetCnv->preFromULength<0 ||
2226            fromUArgs.flush
2227        ) {
2228            fromUArgs.source=*pivotSource;
2229            fromUArgs.sourceLimit=*pivotTarget;
2230            _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2231            if(U_FAILURE(*pErrorCode)) {
2232                /* target overflow, or conversion error */
2233                *pivotSource=(UChar *)fromUArgs.source;
2234                break;
2235            }
2236
2237            /*
2238             * _fromUnicodeWithCallback() must have consumed the pivot contents
2239             * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2240             */
2241        }
2242
2243        /* The pivot buffer is empty; reset it so we start at pivotStart. */
2244        *pivotSource=*pivotTarget=pivotStart;
2245
2246        /*
2247         * if(sourceCnv overflow buffer not empty) {
2248         *     move(sourceCnv overflow buffer -> pivot);
2249         *     continue;
2250         * }
2251         */
2252        /* output the sourceCnv overflow buffer */
2253        if(sourceCnv->UCharErrorBufferLength>0) {
2254            if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2255                /* U_BUFFER_OVERFLOW_ERROR */
2256                *pErrorCode=U_ZERO_ERROR;
2257            }
2258            continue;
2259        }
2260
2261        /*
2262         * check for end of input and break if done
2263         *
2264         * Checking both flush and fromUArgs.flush ensures that the converters
2265         * have been called with the flush flag set if the ucnv_convertEx()
2266         * caller set it.
2267         */
2268        if( toUArgs.source==sourceLimit &&
2269            sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2270            (!flush || fromUArgs.flush)
2271        ) {
2272            /* done successfully */
2273            break;
2274        }
2275
2276        /*
2277         * use direct conversion if available
2278         * but not if continuing a partial match
2279         * or flushing the toUnicode replay buffer
2280         */
2281        if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2282            if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2283                /* remove a warning that may be set by this function */
2284                *pErrorCode=U_ZERO_ERROR;
2285            }
2286            convert(&fromUArgs, &toUArgs, pErrorCode);
2287            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2288                break;
2289            } else if(U_FAILURE(*pErrorCode)) {
2290                if(sourceCnv->toULength>0) {
2291                    /*
2292                     * Fall through to calling _toUnicodeWithCallback()
2293                     * for callback handling.
2294                     *
2295                     * The pivot buffer will be reset with
2296                     *   *pivotSource=*pivotTarget=pivotStart;
2297                     * which indicates a toUnicode error to the caller
2298                     * (*pivotSource==pivotStart shows no pivot UChars consumed).
2299                     */
2300                } else {
2301                    /*
2302                     * Indicate a fromUnicode error to the caller
2303                     * (*pivotSource>pivotStart shows some pivot UChars consumed).
2304                     */
2305                    *pivotSource=*pivotTarget=pivotStart+1;
2306                    /*
2307                     * Loop around to calling _fromUnicodeWithCallbacks()
2308                     * for callback handling.
2309                     */
2310                    continue;
2311                }
2312            } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2313                /*
2314                 * No error, but the implementation requested to temporarily
2315                 * fall back to pivoting.
2316                 */
2317                *pErrorCode=U_ZERO_ERROR;
2318            /*
2319             * The following else branches are almost identical to the end-of-input
2320             * handling in _toUnicodeWithCallback().
2321             * Avoid calling it just for the end of input.
2322             */
2323            } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2324                /*
2325                 * the entire input stream is consumed
2326                 * and there is a partial, truncated input sequence left
2327                 */
2328
2329                /* inject an error and continue with callback handling */
2330                *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2331            } else {
2332                /* input consumed */
2333                if(flush) {
2334                    /* reset the converters without calling the callback functions */
2335                    _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2336                    _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2337                }
2338
2339                /* done successfully */
2340                break;
2341            }
2342        }
2343
2344        /*
2345         * toUnicode(source -> pivot);
2346         *
2347         * For pivoting conversion; and for direct conversion for
2348         * error callback handling, continuing partial matches
2349         * and flushing the replay buffer.
2350         *
2351         * The pivot buffer is empty and reset.
2352         */
2353        toUArgs.target=pivotStart; /* ==*pivotTarget */
2354        /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2355        _toUnicodeWithCallback(&toUArgs, pErrorCode);
2356        *pivotTarget=toUArgs.target;
2357        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2358            /* pivot overflow: continue with the conversion loop */
2359            *pErrorCode=U_ZERO_ERROR;
2360        } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
2361            /* conversion error, or there was nothing left to convert */
2362            break;
2363        }
2364        /*
2365         * else:
2366         * _toUnicodeWithCallback() wrote into the pivot buffer,
2367         * continue with fromUnicode conversion.
2368         *
2369         * Set the fromUnicode flush flag if we flush and if toUnicode has
2370         * processed the end of the input.
2371         */
2372        if( flush && toUArgs.source==sourceLimit &&
2373            sourceCnv->preToULength>=0 &&
2374            sourceCnv->UCharErrorBufferLength==0
2375        ) {
2376            fromUArgs.flush=TRUE;
2377        }
2378    }
2379
2380    /*
2381     * The conversion loop is exited when one of the following is true:
2382     * - the entire source text has been converted successfully to the target buffer
2383     * - a target buffer overflow occurred
2384     * - a conversion error occurred
2385     */
2386
2387    *source=toUArgs.source;
2388    *target=fromUArgs.target;
2389
2390    /* terminate the target buffer if possible */
2391    if(flush && U_SUCCESS(*pErrorCode)) {
2392        if(*target!=targetLimit) {
2393            **target=0;
2394            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2395                *pErrorCode=U_ZERO_ERROR;
2396            }
2397        } else {
2398            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2399        }
2400    }
2401}
2402
2403/* internal implementation of ucnv_convert() etc. with preflighting */
2404static int32_t
2405ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2406                     char *target, int32_t targetCapacity,
2407                     const char *source, int32_t sourceLength,
2408                     UErrorCode *pErrorCode) {
2409    UChar pivotBuffer[CHUNK_SIZE];
2410    UChar *pivot, *pivot2;
2411
2412    char *myTarget;
2413    const char *sourceLimit;
2414    const char *targetLimit;
2415    int32_t targetLength=0;
2416
2417    /* set up */
2418    if(sourceLength<0) {
2419        sourceLimit=uprv_strchr(source, 0);
2420    } else {
2421        sourceLimit=source+sourceLength;
2422    }
2423
2424    /* if there is no input data, we're done */
2425    if(source==sourceLimit) {
2426        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2427    }
2428
2429    pivot=pivot2=pivotBuffer;
2430    myTarget=target;
2431    targetLength=0;
2432
2433    if(targetCapacity>0) {
2434        /* perform real conversion */
2435        targetLimit=target+targetCapacity;
2436        ucnv_convertEx(outConverter, inConverter,
2437                       &myTarget, targetLimit,
2438                       &source, sourceLimit,
2439                       pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2440                       FALSE,
2441                       TRUE,
2442                       pErrorCode);
2443        targetLength=(int32_t)(myTarget-target);
2444    }
2445
2446    /*
2447     * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2448     * to it but continue the conversion in order to store in targetCapacity
2449     * the number of bytes that was required.
2450     */
2451    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2452    {
2453        char targetBuffer[CHUNK_SIZE];
2454
2455        targetLimit=targetBuffer+CHUNK_SIZE;
2456        do {
2457            *pErrorCode=U_ZERO_ERROR;
2458            myTarget=targetBuffer;
2459            ucnv_convertEx(outConverter, inConverter,
2460                           &myTarget, targetLimit,
2461                           &source, sourceLimit,
2462                           pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2463                           FALSE,
2464                           TRUE,
2465                           pErrorCode);
2466            targetLength+=(int32_t)(myTarget-targetBuffer);
2467        } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2468
2469        /* done with preflighting, set warnings and errors as appropriate */
2470        return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2471    }
2472
2473    /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2474    return targetLength;
2475}
2476
2477U_CAPI int32_t U_EXPORT2
2478ucnv_convert(const char *toConverterName, const char *fromConverterName,
2479             char *target, int32_t targetCapacity,
2480             const char *source, int32_t sourceLength,
2481             UErrorCode *pErrorCode) {
2482    UConverter in, out; /* stack-allocated */
2483    UConverter *inConverter, *outConverter;
2484    int32_t targetLength;
2485
2486    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2487        return 0;
2488    }
2489
2490    if( source==NULL || sourceLength<-1 ||
2491        targetCapacity<0 || (targetCapacity>0 && target==NULL)
2492    ) {
2493        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2494        return 0;
2495    }
2496
2497    /* if there is no input data, we're done */
2498    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2499        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2500    }
2501
2502    /* create the converters */
2503    inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2504    if(U_FAILURE(*pErrorCode)) {
2505        return 0;
2506    }
2507
2508    outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2509    if(U_FAILURE(*pErrorCode)) {
2510        ucnv_close(inConverter);
2511        return 0;
2512    }
2513
2514    targetLength=ucnv_internalConvert(outConverter, inConverter,
2515                                      target, targetCapacity,
2516                                      source, sourceLength,
2517                                      pErrorCode);
2518
2519    ucnv_close(inConverter);
2520    ucnv_close(outConverter);
2521
2522    return targetLength;
2523}
2524
2525/* @internal */
2526static int32_t
2527ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2528                        UConverterType algorithmicType,
2529                        UConverter *cnv,
2530                        char *target, int32_t targetCapacity,
2531                        const char *source, int32_t sourceLength,
2532                        UErrorCode *pErrorCode) {
2533    UConverter algoConverterStatic; /* stack-allocated */
2534    UConverter *algoConverter, *to, *from;
2535    int32_t targetLength;
2536
2537    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2538        return 0;
2539    }
2540
2541    if( cnv==NULL || source==NULL || sourceLength<-1 ||
2542        targetCapacity<0 || (targetCapacity>0 && target==NULL)
2543    ) {
2544        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2545        return 0;
2546    }
2547
2548    /* if there is no input data, we're done */
2549    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2550        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2551    }
2552
2553    /* create the algorithmic converter */
2554    algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2555                                                  "", 0, pErrorCode);
2556    if(U_FAILURE(*pErrorCode)) {
2557        return 0;
2558    }
2559
2560    /* reset the other converter */
2561    if(convertToAlgorithmic) {
2562        /* cnv->Unicode->algo */
2563        ucnv_resetToUnicode(cnv);
2564        to=algoConverter;
2565        from=cnv;
2566    } else {
2567        /* algo->Unicode->cnv */
2568        ucnv_resetFromUnicode(cnv);
2569        from=algoConverter;
2570        to=cnv;
2571    }
2572
2573    targetLength=ucnv_internalConvert(to, from,
2574                                      target, targetCapacity,
2575                                      source, sourceLength,
2576                                      pErrorCode);
2577
2578    ucnv_close(algoConverter);
2579
2580    return targetLength;
2581}
2582
2583U_CAPI int32_t U_EXPORT2
2584ucnv_toAlgorithmic(UConverterType algorithmicType,
2585                   UConverter *cnv,
2586                   char *target, int32_t targetCapacity,
2587                   const char *source, int32_t sourceLength,
2588                   UErrorCode *pErrorCode) {
2589    return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2590                                   target, targetCapacity,
2591                                   source, sourceLength,
2592                                   pErrorCode);
2593}
2594
2595U_CAPI int32_t U_EXPORT2
2596ucnv_fromAlgorithmic(UConverter *cnv,
2597                     UConverterType algorithmicType,
2598                     char *target, int32_t targetCapacity,
2599                     const char *source, int32_t sourceLength,
2600                     UErrorCode *pErrorCode) {
2601    return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2602                                   target, targetCapacity,
2603                                   source, sourceLength,
2604                                   pErrorCode);
2605}
2606
2607U_CAPI UConverterType  U_EXPORT2
2608ucnv_getType(const UConverter* converter)
2609{
2610    int8_t type = converter->sharedData->staticData->conversionType;
2611#if !UCONFIG_NO_LEGACY_CONVERSION
2612    if(type == UCNV_MBCS) {
2613        return ucnv_MBCSGetType(converter);
2614    }
2615#endif
2616    return (UConverterType)type;
2617}
2618
2619U_CAPI void  U_EXPORT2
2620ucnv_getStarters(const UConverter* converter,
2621                 UBool starters[256],
2622                 UErrorCode* err)
2623{
2624    if (err == NULL || U_FAILURE(*err)) {
2625        return;
2626    }
2627
2628    if(converter->sharedData->impl->getStarters != NULL) {
2629        converter->sharedData->impl->getStarters(converter, starters, err);
2630    } else {
2631        *err = U_ILLEGAL_ARGUMENT_ERROR;
2632    }
2633}
2634
2635static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2636{
2637    UErrorCode errorCode;
2638    const char *name;
2639    int32_t i;
2640
2641    if(cnv==NULL) {
2642        return NULL;
2643    }
2644
2645    errorCode=U_ZERO_ERROR;
2646    name=ucnv_getName(cnv, &errorCode);
2647    if(U_FAILURE(errorCode)) {
2648        return NULL;
2649    }
2650
2651    for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2652    {
2653        if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2654        {
2655            return ambiguousConverters+i;
2656        }
2657    }
2658
2659    return NULL;
2660}
2661
2662U_CAPI void  U_EXPORT2
2663ucnv_fixFileSeparator(const UConverter *cnv,
2664                      UChar* source,
2665                      int32_t sourceLength) {
2666    const UAmbiguousConverter *a;
2667    int32_t i;
2668    UChar variant5c;
2669
2670    if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2671    {
2672        return;
2673    }
2674
2675    variant5c=a->variant5c;
2676    for(i=0; i<sourceLength; ++i) {
2677        if(source[i]==variant5c) {
2678            source[i]=0x5c;
2679        }
2680    }
2681}
2682
2683U_CAPI UBool  U_EXPORT2
2684ucnv_isAmbiguous(const UConverter *cnv) {
2685    return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2686}
2687
2688U_CAPI void  U_EXPORT2
2689ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2690{
2691    cnv->useFallback = usesFallback;
2692}
2693
2694U_CAPI UBool  U_EXPORT2
2695ucnv_usesFallback(const UConverter *cnv)
2696{
2697    return cnv->useFallback;
2698}
2699
2700U_CAPI void  U_EXPORT2
2701ucnv_getInvalidChars (const UConverter * converter,
2702                      char *errBytes,
2703                      int8_t * len,
2704                      UErrorCode * err)
2705{
2706    if (err == NULL || U_FAILURE(*err))
2707    {
2708        return;
2709    }
2710    if (len == NULL || errBytes == NULL || converter == NULL)
2711    {
2712        *err = U_ILLEGAL_ARGUMENT_ERROR;
2713        return;
2714    }
2715    if (*len < converter->invalidCharLength)
2716    {
2717        *err = U_INDEX_OUTOFBOUNDS_ERROR;
2718        return;
2719    }
2720    if ((*len = converter->invalidCharLength) > 0)
2721    {
2722        uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2723    }
2724}
2725
2726U_CAPI void  U_EXPORT2
2727ucnv_getInvalidUChars (const UConverter * converter,
2728                       UChar *errChars,
2729                       int8_t * len,
2730                       UErrorCode * err)
2731{
2732    if (err == NULL || U_FAILURE(*err))
2733    {
2734        return;
2735    }
2736    if (len == NULL || errChars == NULL || converter == NULL)
2737    {
2738        *err = U_ILLEGAL_ARGUMENT_ERROR;
2739        return;
2740    }
2741    if (*len < converter->invalidUCharLength)
2742    {
2743        *err = U_INDEX_OUTOFBOUNDS_ERROR;
2744        return;
2745    }
2746    if ((*len = converter->invalidUCharLength) > 0)
2747    {
2748        uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2749    }
2750}
2751
2752#define SIG_MAX_LEN 5
2753
2754U_CAPI const char* U_EXPORT2
2755ucnv_detectUnicodeSignature( const char* source,
2756                             int32_t sourceLength,
2757                             int32_t* signatureLength,
2758                             UErrorCode* pErrorCode) {
2759    int32_t dummy;
2760
2761    /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2762     * bytes we don't misdetect something
2763     */
2764    char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2765    int i = 0;
2766
2767    if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2768        return NULL;
2769    }
2770
2771    if(source == NULL || sourceLength < -1){
2772        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2773        return NULL;
2774    }
2775
2776    if(signatureLength == NULL) {
2777        signatureLength = &dummy;
2778    }
2779
2780    if(sourceLength==-1){
2781        sourceLength=(int32_t)uprv_strlen(source);
2782    }
2783
2784
2785    while(i<sourceLength&& i<SIG_MAX_LEN){
2786        start[i]=source[i];
2787        i++;
2788    }
2789
2790    if(start[0] == '\xFE' && start[1] == '\xFF') {
2791        *signatureLength=2;
2792        return  "UTF-16BE";
2793    } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2794        if(start[2] == '\x00' && start[3] =='\x00') {
2795            *signatureLength=4;
2796            return "UTF-32LE";
2797        } else {
2798            *signatureLength=2;
2799            return  "UTF-16LE";
2800        }
2801    } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2802        *signatureLength=3;
2803        return  "UTF-8";
2804    } else if(start[0] == '\x00' && start[1] == '\x00' &&
2805              start[2] == '\xFE' && start[3]=='\xFF') {
2806        *signatureLength=4;
2807        return  "UTF-32BE";
2808    } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2809        *signatureLength=3;
2810        return "SCSU";
2811    } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2812        *signatureLength=3;
2813        return "BOCU-1";
2814    } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2815        /*
2816         * UTF-7: Initial U+FEFF is encoded as +/v8  or  +/v9  or  +/v+  or  +/v/
2817         * depending on the second UTF-16 code unit.
2818         * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2819         * if it occurs.
2820         *
2821         * So far we have +/v
2822         */
2823        if(start[3] == '\x38' && start[4] == '\x2D') {
2824            /* 5 bytes +/v8- */
2825            *signatureLength=5;
2826            return "UTF-7";
2827        } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2828            /* 4 bytes +/v8  or  +/v9  or  +/v+  or  +/v/ */
2829            *signatureLength=4;
2830            return "UTF-7";
2831        }
2832    }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2833        *signatureLength=4;
2834        return "UTF-EBCDIC";
2835    }
2836
2837
2838    /* no known Unicode signature byte sequence recognized */
2839    *signatureLength=0;
2840    return NULL;
2841}
2842
2843U_CAPI int32_t U_EXPORT2
2844ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2845{
2846    if(status == NULL || U_FAILURE(*status)){
2847        return -1;
2848    }
2849    if(cnv == NULL){
2850        *status = U_ILLEGAL_ARGUMENT_ERROR;
2851        return -1;
2852    }
2853
2854    if(cnv->preFromUFirstCP >= 0){
2855        return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2856    }else if(cnv->preFromULength < 0){
2857        return -cnv->preFromULength ;
2858    }else if(cnv->fromUChar32 > 0){
2859        return 1;
2860    }
2861    return 0;
2862
2863}
2864
2865U_CAPI int32_t U_EXPORT2
2866ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2867
2868    if(status == NULL || U_FAILURE(*status)){
2869        return -1;
2870    }
2871    if(cnv == NULL){
2872        *status = U_ILLEGAL_ARGUMENT_ERROR;
2873        return -1;
2874    }
2875
2876    if(cnv->preToULength > 0){
2877        return cnv->preToULength ;
2878    }else if(cnv->preToULength < 0){
2879        return -cnv->preToULength;
2880    }else if(cnv->toULength > 0){
2881        return cnv->toULength;
2882    }
2883    return 0;
2884}
2885
2886U_CAPI UBool U_EXPORT2
2887ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
2888    if (U_FAILURE(*status)) {
2889        return FALSE;
2890    }
2891
2892    if (cnv == NULL) {
2893        *status = U_ILLEGAL_ARGUMENT_ERROR;
2894        return FALSE;
2895    }
2896
2897    switch (ucnv_getType(cnv)) {
2898        case UCNV_SBCS:
2899        case UCNV_DBCS:
2900        case UCNV_UTF32_BigEndian:
2901        case UCNV_UTF32_LittleEndian:
2902        case UCNV_UTF32:
2903        case UCNV_US_ASCII:
2904            return TRUE;
2905        default:
2906            return FALSE;
2907    }
2908}
2909#endif
2910
2911/*
2912 * Hey, Emacs, please set the following:
2913 *
2914 * Local Variables:
2915 * indent-tabs-mode: nil
2916 * End:
2917 *
2918 */
2919