utf8.c (122467) | utf8.c (128004) |
---|---|
1/*- | 1/*- |
2 * Copyright (c) 2002, 2003 Tim J. Robbins | 2 * Copyright (c) 2002-2004 Tim J. Robbins |
3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright --- 8 unchanged lines hidden (view full) --- 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 | 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright --- 8 unchanged lines hidden (view full) --- 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 |
27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/lib/libc/locale/utf8.c 122467 2003-11-11 07:25:05Z tjr $"); | 27#include <sys/param.h> 28__FBSDID("$FreeBSD: head/lib/libc/locale/utf8.c 128004 2004-04-07 10:48:19Z tjr $"); |
29 30#include <errno.h> 31#include <runetype.h> | 29 30#include <errno.h> 31#include <runetype.h> |
32#include <stddef.h> 33#include <stdio.h> | |
34#include <stdlib.h> | 32#include <stdlib.h> |
33#include <string.h> |
|
35#include <wchar.h> 36 37extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, 38 size_t, mbstate_t * __restrict); | 34#include <wchar.h> 35 36extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, 37 size_t, mbstate_t * __restrict); |
38extern int (*__mbsinit)(const mbstate_t *); |
|
39extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); 40 | 39extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); 40 |
41size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, | 41size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, |
42 mbstate_t * __restrict); | 42 mbstate_t * __restrict); |
43size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); | 43int _UTF8_mbsinit(const mbstate_t *); 44size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); |
44 | 45 |
46typedef struct { 47 int count; 48 u_char bytes[6]; 49} _UTF8State; 50 |
|
45int 46_UTF8_init(_RuneLocale *rl) 47{ 48 49 __mbrtowc = _UTF8_mbrtowc; 50 __wcrtomb = _UTF8_wcrtomb; | 51int 52_UTF8_init(_RuneLocale *rl) 53{ 54 55 __mbrtowc = _UTF8_mbrtowc; 56 __wcrtomb = _UTF8_wcrtomb; |
57 __mbsinit = _UTF8_mbsinit; |
|
51 _CurrentRuneLocale = rl; 52 __mb_cur_max = 6; 53 54 return (0); 55} 56 | 58 _CurrentRuneLocale = rl; 59 __mb_cur_max = 6; 60 61 return (0); 62} 63 |
64int 65_UTF8_mbsinit(const mbstate_t *ps) 66{ 67 68 return (ps == NULL || ((_UTF8State *)ps)->count == 0); 69} 70 |
|
57size_t 58_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, | 71size_t 72_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, |
59 mbstate_t * __restrict ps __unused) | 73 mbstate_t * __restrict ps) |
60{ | 74{ |
61 int ch, i, len, mask; | 75 _UTF8State *us; 76 int ch, i, len, mask, ocount; |
62 wchar_t lbound, wch; | 77 wchar_t lbound, wch; |
78 size_t ncopy; |
|
63 | 79 |
64 if (s == NULL) 65 /* Reset to initial shift state (no-op) */ 66 return (0); | 80 us = (_UTF8State *)ps; 81 82 if (s == NULL) { 83 s = ""; 84 n = 1; 85 pwc = NULL; 86 } 87 88 ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(us->bytes) - us->count); 89 memcpy(us->bytes + us->count, s, ncopy); 90 ocount = us->count; 91 us->count += ncopy; 92 s = (char *)us->bytes; 93 n = us->count; 94 |
67 if (n == 0) 68 /* Incomplete multibyte sequence */ 69 return ((size_t)-2); 70 71 /* 72 * Determine the number of octets that make up this character from 73 * the first octet, and a mask that extracts the interesting bits of 74 * the first octet. --- 63 unchanged lines hidden (view full) --- 138 /* 139 * Malformed input; redundant encoding. 140 */ 141 errno = EILSEQ; 142 return ((size_t)-1); 143 } 144 if (pwc != NULL) 145 *pwc = wch; | 95 if (n == 0) 96 /* Incomplete multibyte sequence */ 97 return ((size_t)-2); 98 99 /* 100 * Determine the number of octets that make up this character from 101 * the first octet, and a mask that extracts the interesting bits of 102 * the first octet. --- 63 unchanged lines hidden (view full) --- 166 /* 167 * Malformed input; redundant encoding. 168 */ 169 errno = EILSEQ; 170 return ((size_t)-1); 171 } 172 if (pwc != NULL) 173 *pwc = wch; |
146 return (wch == L'\0' ? 0 : len); | 174 us->count = 0; 175 return (wch == L'\0' ? 0 : len - ocount); |
147} 148 149size_t 150_UTF8_wcrtomb(char * __restrict s, wchar_t wc, 151 mbstate_t * __restrict ps __unused) 152{ 153 unsigned char lead; 154 int i, len; --- 48 unchanged lines hidden --- | 176} 177 178size_t 179_UTF8_wcrtomb(char * __restrict s, wchar_t wc, 180 mbstate_t * __restrict ps __unused) 181{ 182 unsigned char lead; 183 int i, len; --- 48 unchanged lines hidden --- |