1/* 2 * util.c - Miscellaneous support 3 * 4 * Copyright (C) 1997,1999 Martin von L�wis 5 * Copyright (C) 1997 R�gis Duchesne 6 * Copyright (C) 2001 Anton Altaparmakov (AIA) 7 * 8 * The utf8 routines are copied from Python wstrop module. 9 */ 10 11#include "ntfstypes.h" 12#include "struct.h" 13#include "util.h" 14#include <linux/string.h> 15#include <linux/errno.h> 16#include <asm/div64.h> /* For do_div(). */ 17#include "support.h" 18 19/* 20 * Converts a single wide character to a sequence of utf8 bytes. 21 * The character is represented in host byte order. 22 * Returns the number of bytes, or 0 on error. 23 */ 24static int to_utf8(ntfs_u16 c, unsigned char *buf) 25{ 26 if (c == 0) 27 return 0; /* No support for embedded 0 runes. */ 28 if (c < 0x80) { 29 if (buf) 30 buf[0] = (unsigned char)c; 31 return 1; 32 } 33 if (c < 0x800) { 34 if (buf) { 35 buf[0] = 0xc0 | (c >> 6); 36 buf[1] = 0x80 | (c & 0x3f); 37 } 38 return 2; 39 } 40 /* c < 0x10000 */ 41 if (buf) { 42 buf[0] = 0xe0 | (c >> 12); 43 buf[1] = 0x80 | ((c >> 6) & 0x3f); 44 buf[2] = 0x80 | (c & 0x3f); 45 } 46 return 3; 47} 48 49/* 50 * Decodes a sequence of utf8 bytes into a single wide character. 51 * The character is returned in host byte order. 52 * Returns the number of bytes consumed, or 0 on error. 53 */ 54static int from_utf8(const unsigned char *str, ntfs_u16 *c) 55{ 56 int l = 0, i; 57 58 if (*str < 0x80) { 59 *c = *str; 60 return 1; 61 } 62 if (*str < 0xc0) /* Lead byte must not be 10xxxxxx. */ 63 return 0; /* Is c0 a possible lead byte? */ 64 if (*str < 0xe0) { /* 110xxxxx */ 65 *c = *str & 0x1f; 66 l = 2; 67 } else if (*str < 0xf0) { /* 1110xxxx */ 68 *c = *str & 0xf; 69 l = 3; 70 } else if (*str < 0xf8) { /* 11110xxx */ 71 *c = *str & 7; 72 l = 4; 73 } else /* We don't support characters above 0xFFFF in NTFS. */ 74 return 0; 75 for (i = 1; i < l; i++) { 76 /* All other bytes must be 10xxxxxx. */ 77 if ((str[i] & 0xc0) != 0x80) 78 return 0; 79 *c <<= 6; 80 *c |= str[i] & 0x3f; 81 } 82 return l; 83} 84 85/* 86 * Converts wide string to UTF-8. Expects two in- and two out-parameters. 87 * Returns 0 on success, or error code. 88 * The caller has to free the result string. 89 */ 90static int ntfs_dupuni2utf8(ntfs_u16 *in, int in_len, char **out, int *out_len) 91{ 92 int i, tmp; 93 int len8; 94 unsigned char *result; 95 96 ntfs_debug(DEBUG_NAME1, "converting l = %d\n", in_len); 97 /* Count the length of the resulting UTF-8. */ 98 for (i = len8 = 0; i < in_len; i++) { 99 tmp = to_utf8(NTFS_GETU16(in + i), 0); 100 if (!tmp) 101 /* Invalid character. */ 102 return -EILSEQ; 103 len8 += tmp; 104 } 105 *out = result = ntfs_malloc(len8 + 1); /* allow for zero-termination */ 106 if (!result) 107 return -ENOMEM; 108 result[len8] = '\0'; 109 *out_len = len8; 110 for (i = len8 = 0; i < in_len; i++) 111 len8 += to_utf8(NTFS_GETU16(in + i), result + len8); 112 ntfs_debug(DEBUG_NAME1, "result %p:%s\n", result, result); 113 return 0; 114} 115 116/* 117 * Converts an UTF-8 sequence to a wide string. Same conventions as the 118 * previous function. 119 */ 120static int ntfs_duputf82uni(unsigned char* in, int in_len, ntfs_u16** out, 121 int *out_len) 122{ 123 int i, tmp; 124 int len16; 125 ntfs_u16* result; 126 ntfs_u16 wtmp; 127 128 for (i = len16 = 0; i < in_len; i += tmp, len16++) { 129 tmp = from_utf8(in + i, &wtmp); 130 if (!tmp) 131 return -EILSEQ; 132 } 133 *out = result = ntfs_malloc(2 * (len16 + 1)); 134 if (!result) 135 return -ENOMEM; 136 result[len16] = 0; 137 *out_len = len16; 138 for (i = len16 = 0; i < in_len; i += tmp, len16++) { 139 tmp = from_utf8(in + i, &wtmp); 140 NTFS_PUTU16(result + len16, wtmp); 141 } 142 return 0; 143} 144 145/* Encodings dispatchers. */ 146int ntfs_encodeuni(ntfs_volume *vol, ntfs_u16 *in, int in_len, char **out, 147 int *out_len) 148{ 149 if (vol->nls_map) 150 return ntfs_dupuni2map(vol, in, in_len, out, out_len); 151 else 152 return ntfs_dupuni2utf8(in, in_len, out, out_len); 153} 154 155int ntfs_decodeuni(ntfs_volume *vol, char *in, int in_len, ntfs_u16 **out, 156 int *out_len) 157{ 158 if (vol->nls_map) 159 return ntfs_dupmap2uni(vol, in, in_len, out, out_len); 160 else 161 return ntfs_duputf82uni(in, in_len, out, out_len); 162} 163 164/* Same address space copies. */ 165void ntfs_put(ntfs_io *dest, void *src, ntfs_size_t n) 166{ 167 ntfs_memcpy(dest->param, src, n); 168 ((char*)dest->param) += n; 169} 170 171void ntfs_get(void* dest, ntfs_io *src, ntfs_size_t n) 172{ 173 ntfs_memcpy(dest, src->param, n); 174 ((char*)src->param) += n; 175} 176 177void *ntfs_calloc(int size) 178{ 179 void *result = ntfs_malloc(size); 180 if (result) 181 ntfs_bzero(result, size); 182 return result; 183} 184 185/* Copy len ascii characters from from to to. :) */ 186void ntfs_ascii2uni(short int *to, char *from, int len) 187{ 188 int i; 189 190 for (i = 0; i < len; i++) 191 NTFS_PUTU16(to + i, from[i]); 192 to[i] = 0; 193} 194 195/* strncmp for Unicode strings. */ 196int ntfs_uni_strncmp(short int* a, short int *b, int n) 197{ 198 int i; 199 200 for(i = 0; i < n; i++) 201 { 202 if (NTFS_GETU16(a + i) < NTFS_GETU16(b + i)) 203 return -1; 204 if (NTFS_GETU16(b + i) < NTFS_GETU16(a + i)) 205 return 1; 206 if (NTFS_GETU16(a + i) == 0) 207 break; 208 } 209 return 0; 210} 211 212/* strncmp between Unicode and ASCII strings. */ 213int ntfs_ua_strncmp(short int* a, char* b, int n) 214{ 215 int i; 216 217 for (i = 0; i < n; i++) { 218 if(NTFS_GETU16(a + i) < b[i]) 219 return -1; 220 if(b[i] < NTFS_GETU16(a + i)) 221 return 1; 222 if (b[i] == 0) 223 return 0; 224 } 225 return 0; 226} 227 228#define NTFS_TIME_OFFSET ((ntfs_time64_t)(369*365 + 89) * 24 * 3600 * 10000000) 229 230/* Convert the NT UTC (based 1.1.1601, in hundred nanosecond units) 231 * into Unix UTC (based 1.1.1970, in seconds). */ 232ntfs_time_t ntfs_ntutc2unixutc(ntfs_time64_t ntutc) 233{ 234 /* Subtract the NTFS time offset, then convert to 1s intervals. */ 235 ntfs_time64_t t = ntutc - NTFS_TIME_OFFSET; 236 do_div(t, 10000000); 237 return (ntfs_time_t)t; 238} 239 240/* Convert the Unix UTC into NT UTC. */ 241ntfs_time64_t ntfs_unixutc2ntutc(ntfs_time_t t) 242{ 243 /* Convert to 100ns intervals and then add the NTFS time offset. */ 244 return (ntfs_time64_t)t * 10000000 + NTFS_TIME_OFFSET; 245} 246 247#undef NTFS_TIME_OFFSET 248 249/* Fill index name. */ 250void ntfs_indexname(char *buf, int type) 251{ 252 char hex[] = "0123456789ABCDEF"; 253 int index; 254 *buf++ = '$'; 255 *buf++ = 'I'; 256 for (index = 24; index > 0; index -= 4) 257 if ((0xF << index) & type) 258 break; 259 while (index >= 0) { 260 *buf++ = hex[(type >> index) & 0xF]; 261 index -= 4; 262 } 263 *buf = '\0'; 264} 265 266