1/*- 2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 3 * at Electronni Visti IA, Kiev, Ukraine. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: src/lib/libc/string/strxfrm.c,v 1.17 2008/10/19 09:10:44 delphij Exp $"); 30 31#include "xlocale_private.h" 32 33#include <stdlib.h> 34#include <string.h> 35#include <wchar.h> 36#include <errno.h> 37#include "collate.h" 38 39/* 40 * In the non-POSIX case, we transform each character into a string of 41 * characters representing the character's priority. Since char is usually 42 * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 43 * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 44 * bits per byte. We choose 4 bytes per character as a good compromise 45 * between maximum coverage and minimum size. This gives 24 bits, or 16M 46 * priorities. So we choose COLLATE_MAX_PRIORITY to be (2^24 - 1). This 47 * this can be increased if more is needed. 48 */ 49 50#define XFRM_BYTES 4 51#define XFRM_OFFSET ('0') /* make all printable characters */ 52#define XFRM_SHIFT 6 53#define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 54 55static void 56xfrm(unsigned char *p, int pri) 57{ 58 59 p[3] = (pri & XFRM_MASK) + XFRM_OFFSET; 60 pri >>= XFRM_SHIFT; 61 p[2] = (pri & XFRM_MASK) + XFRM_OFFSET; 62 pri >>= XFRM_SHIFT; 63 p[1] = (pri & XFRM_MASK) + XFRM_OFFSET; 64 pri >>= XFRM_SHIFT; 65 p[0] = (pri & XFRM_MASK) + XFRM_OFFSET; 66} 67 68size_t 69strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, 70 locale_t loc) 71{ 72 size_t slen; 73 wchar_t *wcs, *xf[2]; 74 int sverrno; 75 76 if (!*src && dest) { 77 if (len > 0) 78 *dest = '\0'; 79 return 0; 80 } 81 82 NORMALIZE_LOCALE(loc); 83 if (loc->__collate_load_error || (wcs = __collate_mbstowcs(src, loc)) == NULL) 84 return strlcpy(dest, src, len); 85 86 __collate_xfrm(wcs, xf, loc); 87 88 slen = wcslen(xf[0]) * XFRM_BYTES; 89 if (xf[1]) 90 slen += (wcslen(xf[1]) + 1) * XFRM_BYTES; 91 if (len > 0) { 92 wchar_t *w = xf[0]; 93 int b = 0; 94 unsigned char buf[XFRM_BYTES]; 95 unsigned char *bp; 96 while (len > 1) { 97 if (!b) { 98 if (!*w) 99 break; 100 xfrm(bp = buf, *w++); 101 b = XFRM_BYTES; 102 } 103 *dest++ = *(char *)bp++; 104 b--; 105 len--; 106 } 107 if ((w = xf[1]) != NULL) { 108 xfrm(bp = buf, 0); 109 b = XFRM_BYTES; 110 while (len > 1) { 111 if (!b) 112 break; 113 *dest++ = *(char *)bp++; 114 b--; 115 len--; 116 } 117 b = 0; 118 while (len > 1) { 119 if (!b) { 120 if (!*w) 121 break; 122 xfrm(bp = buf, *w++); 123 b = XFRM_BYTES; 124 } 125 *dest++ = *(char *)bp++; 126 b--; 127 len--; 128 } 129 } 130 *dest = 0; 131 } 132 sverrno = errno; 133 free(wcs); 134 free(xf[0]); 135 free(xf[1]); 136 errno = sverrno; 137 138 return slen; 139} 140 141size_t 142strxfrm(char * __restrict dest, const char * __restrict src, size_t len) 143{ 144 return strxfrm_l(dest, src, len, __current_locale()); 145} 146