1/*
2 * ntfs_unistr.c - NTFS kernel Unicode string operations.
3 *
4 * Copyright (c) 2006-2008 Anton Altaparmakov.  All Rights Reserved.
5 * Portions Copyright (c) 2006-2008 Apple Inc.  All Rights Reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 *    this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its
16 *    contributors may be used to endorse or promote products derived from this
17 *    software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in
31 * full, this file may be redistributed and/or modified under the terms of the
32 * GNU General Public License (GPL) Version 2, in which case the provisions of
33 * that version of the GPL will apply to you instead of the license terms
34 * above.  You can obtain a copy of the GPL Version 2 at
35 * http://developer.apple.com/opensource/licenses/gpl-2.txt.
36 */
37
38#include <sys/errno.h>
39#include <sys/types.h>
40#include <sys/utfconv.h>
41
42#include <string.h>
43
44#include <libkern/OSMalloc.h>
45
46#include "ntfs_debug.h"
47#include "ntfs_endian.h"
48#include "ntfs_types.h"
49#include "ntfs_unistr.h"
50#include "ntfs_volume.h"
51
52/*
53 * IMPORTANT
54 * =========
55 *
56 * All these routines assume that the Unicode characters are in little endian
57 * encoding inside the strings!!!
58 */
59
60/*
61 * This is used by the name collation functions to quickly determine what
62 * characters are (in)valid.
63 */
64static const u8 ntfs_legal_ansi_char_array[0x40] = {
65	0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
66	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
67
68	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
69	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
70
71	0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
72	0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
73
74	0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
75	0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
76};
77
78/**
79 * ntfs_are_names_equal - compare two Unicode names for equality
80 * @s1:			name to compare to @s2
81 * @s1_len:		length in Unicode characters of @s1
82 * @s2:			name to compare to @s1
83 * @s2_len:		length in Unicode characters of @s2
84 * @case_sensitive:	if true compare names case sensitively
85 * @upcase:		upcase table (only if @ic == IGNORE_CASE)
86 * @upcase_len:		length in Unicode characters of @upcase (if present)
87 *
88 * Compare the names @s1 and @s2 and return true if the names are identical, or
89 * false if they are not identical.  If @case_sensitive is false, the @upcase
90 * table is used to perform a case insensitive comparison.
91 */
92BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
93		const ntfschar *s2, size_t s2_len, const BOOL case_sensitive,
94		const ntfschar *upcase, const u32 upcase_len)
95{
96	if (s1_len != s2_len)
97		return FALSE;
98	if (case_sensitive)
99		return !ntfs_ucsncmp(s1, s2, s1_len);
100	return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_len);
101}
102
103/**
104 * ntfs_collate_names - collate two Unicode names
105 * @name1:	first Unicode name to compare
106 * @name2:	second Unicode name to compare
107 * @err_val:	if @name1 contains an invalid character return this value
108 * @case_sensitive:	true if to collace case sensitive and false otherwise
109 * @upcase:		upcase table (ignored if @case_sensitive is false)
110 * @upcase_len:		upcase table length (ignored if !@case_sensitive)
111 *
112 * ntfs_collate_names collates two Unicode names and returns:
113 *
114 *  -1 if the first name collates before the second one,
115 *   0 if the names match,
116 *   1 if the second name collates before the first one, or
117 * @err_val if an invalid character is found in @name1 during the comparison.
118 *
119 * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
120 */
121int ntfs_collate_names(const ntfschar *name1, const u32 name1_len,
122		const ntfschar *name2, const u32 name2_len, const int err_val,
123		const BOOL case_sensitive, const ntfschar *upcase,
124		const u32 upcase_len)
125{
126	u32 cnt, min_len;
127	u16 c1, c2;
128
129	min_len = name1_len;
130	if (name1_len > name2_len)
131		min_len = name2_len;
132	for (cnt = 0; cnt < min_len; ++cnt) {
133		c1 = le16_to_cpu(*name1++);
134		c2 = le16_to_cpu(*name2++);
135		if (!case_sensitive) {
136			if (c1 < upcase_len)
137				c1 = le16_to_cpu(upcase[c1]);
138			if (c2 < upcase_len)
139				c2 = le16_to_cpu(upcase[c2]);
140		}
141		if (c1 < 64 && ntfs_legal_ansi_char_array[c1] & 8)
142			return err_val;
143		if (c1 < c2)
144			return -1;
145		if (c1 > c2)
146			return 1;
147	}
148	if (name1_len < name2_len)
149		return -1;
150	if (name1_len == name2_len)
151		return 0;
152	/*
153	 * name1_len > name2_len
154	 *
155	 * Sanity check the remainder of the string.
156	 */
157	do {
158		c1 = le16_to_cpu(*name1++);
159		if (c1 < 64 && ntfs_legal_ansi_char_array[c1] & 8)
160			return err_val;
161	} while (++cnt < name1_len);
162	return 1;
163}
164
165/**
166 * ntfs_ucsncmp - compare two little endian Unicode strings
167 * @s1:		first string
168 * @s2:		second string
169 * @n:		maximum unicode characters to compare
170 *
171 * Compare the first @n characters of the Unicode strings @s1 and @s2.  The
172 * strings are in little endian format and appropriate le16_to_cpu() conversion
173 * is performed on non-little endian machines.
174 *
175 * The function returns an integer less than, equal to, or greater than zero
176 * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
177 * to be less than, to match, or be greater than @s2.
178 */
179int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
180{
181	u16 c1, c2;
182	size_t i;
183
184	for (i = 0; i < n; ++i) {
185		c1 = le16_to_cpu(s1[i]);
186		c2 = le16_to_cpu(s2[i]);
187		if (c1 < c2)
188			return -1;
189		if (c1 > c2)
190			return 1;
191		if (!c1)
192			break;
193	}
194	return 0;
195}
196
197/**
198 * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
199 * @s1:			first string
200 * @s2:			second string
201 * @n:			maximum unicode characters to compare
202 * @upcase:		upcase table
203 * @upcase_size:	upcase table size in Unicode characters
204 *
205 * Compare the first @n characters of the Unicode strings @s1 and @s2,
206 * ignoring case.  The strings in little endian format and appropriate
207 * le16_to_cpu() conversion is performed on non-little endian machines.
208 *
209 * Each character is uppercased using the @upcase table before the comparison.
210 *
211 * The function returns an integer less than, equal to, or greater than zero
212 * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
213 * to be less than, to match, or be greater than @s2.
214 */
215int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
216		const ntfschar *upcase, const u32 upcase_size)
217{
218	size_t i;
219	u16 c1, c2;
220
221	for (i = 0; i < n; ++i) {
222		if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
223			c1 = le16_to_cpu(upcase[c1]);
224		if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
225			c2 = le16_to_cpu(upcase[c2]);
226		if (c1 < c2)
227			return -1;
228		if (c1 > c2)
229			return 1;
230		if (!c1)
231			break;
232	}
233	return 0;
234}
235
236void ntfs_upcase_name(ntfschar *name, u32 name_len, const ntfschar *upcase,
237		const u32 upcase_len)
238{
239	u32 i;
240	u16 u;
241
242	for (i = 0; i < name_len; i++)
243		if ((u = le16_to_cpu(name[i])) < upcase_len)
244			name[i] = upcase[u];
245}
246
247/**
248 * utf8_to_ntfs - convert UTF-8 OS X string to NTFS string
249 * @vol:	ntfs volume which we are working with
250 * @ins:	input UTF-8 string buffer
251 * @ins_size:	length of input string in bytes
252 * @outs:	on return contains the (allocated) output NTFS string buffer
253 * @outs_size:	pointer to length of output string buffer in bytes
254 *
255 * Convert the input, decomposed, NUL terminated, UTF-8 string @ins into the
256 * little endian, 2-byte, composed Unicode string format used on NTFS volumes.
257 * We convert any non-acceptable characters to the Win32 API to private Unicode
258 * characters on the assumption that the NTFS volume was written in this way,
259 * too and that the mapping used was the same as the Services For Macintosh one
260 * as described at http://support.microsoft.com/kb/q117258/.
261 *
262 * If *@outs is NULL, this function allocates the string and the caller is
263 * responsible for calling OSFree(*@outs, *@outs_size, ntfs_malloc_tag); when
264 * finished with it.
265 *
266 * If *@outs is not NULL, it is used as the destination buffer and the caller
267 * is responsible for having allocated a big enough buffer.  The minimum size
268 * is NTFS_MAX_NAME_LEN * sizeof(ntfschar) bytes.
269 *
270 * On success the function returns the number of Unicode characters written to
271 * the output string *@outs (>= 0), not counting the terminating Unicode NUL
272 * character.  If the output string buffer was allocated, *@outs is set to it
273 * and *@outs_size is set to the size of the allocated buffer.
274 *
275 * On error, a negative number corresponding to the error code is returned.  In
276 * that case the output string is not allocated.  The contents of *@outs and
277 * *@outs_size are then undefined.
278 *
279 * FIXME: Do we want UTF_SFM_CONVERSIONS, i.e. Services For Macintosh,
280 * unconditionally or flexibly by mount option?  And what about the SFU
281 * (Services For Unix) conversions?  Should we use those instead or perhaps
282 * allow a mount option to decide which conversions to use?
283 */
284signed utf8_to_ntfs(const ntfs_volume *vol, const u8 *ins,
285		const size_t ins_size, ntfschar **outs, size_t *outs_size)
286{
287	ntfschar *ntfs;
288	size_t ntfs_size, res_size;
289	errno_t err;
290
291	if (*outs) {
292		ntfs = *outs;
293		ntfs_size = *outs_size;
294	} else {
295		/* Allocate the maximum length NTFS string. */
296		ntfs_size = NTFS_MAX_NAME_LEN << NTFSCHAR_SIZE_SHIFT;
297		ntfs = OSMalloc(ntfs_size, ntfs_malloc_tag);
298		if (!ntfs) {
299			ntfs_error(vol->mp, "Failed to allocate memory for "
300					"output string.");
301			return -ENOMEM;
302		}
303	}
304	ntfs_debug("Input string size in bytes %lu (UTF-8).",
305			(unsigned long)ins_size);
306	/*
307	 * Convert the input string to NTFS formt (NUL terminated).
308	 * FIXME: The "0" is ignored.  If we decide to not specify "0" when
309	 * using utf8_encodelen()/utf8_encodestr() but some other value, we
310	 * would also need to use that other value instead of "0" here.  This
311	 * value would then be converted to the slash '/' character in the NTFS
312	 * string.
313	 */
314	err = utf8_decodestr(ins, ins_size, ntfs, &res_size, ntfs_size, 0,
315			UTF_PRECOMPOSED | UTF_LITTLE_ENDIAN |
316			UTF_SFM_CONVERSIONS);
317	if (err) {
318		ntfs_error(vol->mp, "Failed to convert decomposed, NUL "
319				"terminated, UTF-8 input string '%s' (size "
320				"%lu) to NTFS string (error %d, res_size %lu, "
321				"ntfs_size %lu).", ins, (unsigned long)ins_size,
322				(int)err, (unsigned long)res_size,
323				(unsigned long)ntfs_size);
324		goto err;
325	}
326	if (!*outs) {
327		*outs = ntfs;
328		*outs_size = ntfs_size;
329	}
330	res_size >>= NTFSCHAR_SIZE_SHIFT;
331	ntfs_debug("Converted string size in Unicode characters %lu.",
332			(unsigned long)res_size);
333	return res_size;
334err:
335	if (!*outs)
336		OSFree(ntfs, ntfs_size, ntfs_malloc_tag);
337	return -err;
338}
339
340/**
341 * ntfs_to_utf8 - convert NTFS string to UTF-8 OS X string
342 * @vol:	ntfs volume which we are working with
343 * @ins:	input NTFS string buffer
344 * @ins_size:	length of input string in bytes
345 * @outs:	on return contains the (allocated) output UTF-8 string buffer
346 * @outs_size:	pointer to length of output string buffer in bytes
347 *
348 * Convert the input, little endian, 2-byte, composed Unicode string @ins, of
349 * size @ins_size bytes into the decomposed, NUL terminated, UTF-8 string
350 * format used in the OS X kernel.  We assume that any non-acceptable
351 * characters to the Win32 API have been converted to private Unicode
352 * characters when they were written and the mapping used was the same as the
353 * Services For Macintosh one as described at
354 * http://support.microsoft.com/kb/q117258/.
355 *
356 * If *@outs is NULL, this function allocates the string and the caller is
357 * responsible for calling OSFree(*@outs, *@outs_size, ntfs_malloc_tag); when
358 * finished with it.
359 *
360 * On success the function returns the number of bytes written to the output
361 * string *@outs (>= 0), not counting the terminating NUL byte.  If the output
362 * string buffer was allocated, *@outs is set to it and *@outs_size is set to
363 * the size of the allocated buffer.
364 *
365 * On error, a negative number corresponding to the error code is returned.  In
366 * that case the output string is not allocated.  The contents of *@outs and
367 * *@outs_size are then undefined.
368 *
369 * FIXME: Do we want UTF_SFM_CONVERSIONS, i.e. Services For Macintosh,
370 * unconditionally or flexibly by mount option?  And what about the SFU
371 * (Services For Unix) conversions?  Should we use those instead or perhaps
372 * allow a mount option to decide which conversions to use?
373 */
374signed ntfs_to_utf8(const ntfs_volume *vol, const ntfschar *ins,
375		const size_t ins_size, u8 **outs, size_t *outs_size)
376{
377	u8 *utf8;
378	size_t utf8_size, res_size;
379	errno_t err;
380
381	if (*outs) {
382		utf8 = *outs;
383		utf8_size = *outs_size;
384	} else {
385		/*
386		 * Calculate the length of the decomposed utf8 string.  Add one
387		 * for the NUL terminator.
388		 */
389		utf8_size = utf8_encodelen(ins, ins_size, 0, UTF_DECOMPOSED |
390				UTF_LITTLE_ENDIAN | UTF_SFM_CONVERSIONS) + 1;
391		/* Allocate buffer for the converted string. */
392		utf8 = OSMalloc(utf8_size, ntfs_malloc_tag);
393		if (!utf8) {
394			ntfs_error(vol->mp, "Failed to allocate memory for "
395					"output string.");
396			return -ENOMEM;
397		}
398	}
399	ntfs_debug("Input string size in bytes %lu (NTFS), %lu (decomposed "
400			"UTF-8, including NUL terminator).",
401			(unsigned long)ins_size, (unsigned long)utf8_size);
402	/*
403	 * Convert the input string to decomposed utf-8 (NUL terminated).
404	 * FIXME: The "0" causes any occurences of the slash '/' character to
405	 * be converted to the underscore '_' character.  We could specify
406	 * some other unused character so that that can be mapped back to '/'
407	 * when converting back to NTFS string format.
408	 */
409	err = utf8_encodestr(ins, ins_size, utf8, &res_size, utf8_size, 0,
410			UTF_DECOMPOSED | UTF_LITTLE_ENDIAN |
411			UTF_SFM_CONVERSIONS);
412	if (err) {
413		ntfs_error(vol->mp, "Failed to convert NTFS input string to "
414				"decomposed, NUL terminated, UTF-8 string "
415				"(error %d).", (int)err);
416		goto err;
417	}
418	if (!*outs) {
419		if (res_size + 1 != utf8_size) {
420			ntfs_error(vol->mp, "res_size (%lu) + 1 != utf8_size "
421					"(%lu)", (unsigned long)res_size,
422					(unsigned long)utf8_size);
423			err = EILSEQ;
424			goto err;
425		}
426		*outs = utf8;
427		*outs_size = utf8_size;
428	}
429	ntfs_debug("Converted string size in bytes %lu (decomposed UTF-8).",
430			(unsigned long)res_size);
431	return res_size;
432err:
433	if (!*outs)
434		OSFree(utf8, utf8_size, ntfs_malloc_tag);
435	return -err;
436}
437
438/**
439 * ntfs_upcase_table_generate - generate the NTFS upcase table
440 * @uc:		destination buffer in which to generate the upcase table
441 * @uc_size:	size of the destination buffer in bytes
442 *
443 * Generate the full, 16-bit, little endian NTFS Unicode upcase table as used
444 * by Windows Vista.
445 *
446 * @uc_size must be able to at least hold the full, 16-bit Unicode upcase
447 * table, i.e. 2^16 * sizeof(ntfschar) = 64ki * 2 bytes = 128kiB.
448 */
449void ntfs_upcase_table_generate(ntfschar *uc, int uc_size)
450{
451	int i, r;
452	/*
453	 * "Start" is inclusive and "End" is exclusive, every value has the
454	 * value of "Add" added to it.
455	 */
456	static int add[][3] = { /* Start, End, Add */
457	{0x0061, 0x007b,   -32}, {0x00e0, 0x00f7,  -32}, {0x00f8, 0x00ff, -32},
458	{0x0256, 0x0258,  -205}, {0x028a, 0x028c, -217}, {0x037b, 0x037e, 130},
459	{0x03ac, 0x03ad,   -38}, {0x03ad, 0x03b0,  -37}, {0x03b1, 0x03c2, -32},
460	{0x03c2, 0x03c3,   -31}, {0x03c3, 0x03cc,  -32}, {0x03cc, 0x03cd, -64},
461	{0x03cd, 0x03cf,   -63}, {0x0430, 0x0450,  -32}, {0x0450, 0x0460, -80},
462	{0x0561, 0x0587,   -48}, {0x1f00, 0x1f08,    8}, {0x1f10, 0x1f16,   8},
463	{0x1f20, 0x1f28,     8}, {0x1f30, 0x1f38,    8}, {0x1f40, 0x1f46,   8},
464	{0x1f51, 0x1f52,     8}, {0x1f53, 0x1f54,    8}, {0x1f55, 0x1f56,   8},
465	{0x1f57, 0x1f58,     8}, {0x1f60, 0x1f68,    8}, {0x1f70, 0x1f72,  74},
466	{0x1f72, 0x1f76,    86}, {0x1f76, 0x1f78,  100}, {0x1f78, 0x1f7a, 128},
467	{0x1f7a, 0x1f7c,   112}, {0x1f7c, 0x1f7e,  126}, {0x1f80, 0x1f88,   8},
468	{0x1f90, 0x1f98,     8}, {0x1fa0, 0x1fa8,    8}, {0x1fb0, 0x1fb2,   8},
469	{0x1fb3, 0x1fb4,     9}, {0x1fcc, 0x1fcd,   -9}, {0x1fd0, 0x1fd2,   8},
470	{0x1fe0, 0x1fe2,     8}, {0x1fe5, 0x1fe6,    7}, {0x1ffc, 0x1ffd,  -9},
471	{0x2170, 0x2180,   -16}, {0x24d0, 0x24ea,  -26}, {0x2c30, 0x2c5f, -48},
472	{0x2d00, 0x2d26, -7264}, {0xff41, 0xff5b,  -32}, {0}
473	};
474	/*
475	 * "Start" is exclusive and "End" is inclusive, every second value is
476	 * decremented by one.
477	 */
478	static int skip_dec[][2] = { /* Start, End */
479	{0x0100, 0x012f}, {0x0132, 0x0137}, {0x0139, 0x0149}, {0x014a, 0x0178},
480	{0x0179, 0x017e}, {0x01a0, 0x01a6}, {0x01b3, 0x01b7}, {0x01cd, 0x01dd},
481	{0x01de, 0x01ef}, {0x01f4, 0x01f5}, {0x01f8, 0x01f9}, {0x01fa, 0x0220},
482	{0x0222, 0x0234}, {0x023b, 0x023c}, {0x0241, 0x0242}, {0x0246, 0x024f},
483	{0x03d8, 0x03ef}, {0x03f7, 0x03f8}, {0x03fa, 0x03fb}, {0x0460, 0x0481},
484	{0x048a, 0x04bf}, {0x04c1, 0x04c4}, {0x04c5, 0x04c8}, {0x04c9, 0x04ce},
485	{0x04ec, 0x04ed}, {0x04d0, 0x04eb}, {0x04ee, 0x04f5}, {0x04f6, 0x0513},
486	{0x1e00, 0x1e95}, {0x1ea0, 0x1ef9}, {0x2183, 0x2184}, {0x2c60, 0x2c61},
487	{0x2c67, 0x2c6c}, {0x2c75, 0x2c76}, {0x2c80, 0x2ce3}, {0}
488	};
489	/*
490	 * Set the Unicode character at offset "Offset" to "Value".  Note,
491	 * "Value" is host endian.
492	 */
493	static int set[][2] = { /* Offset, Value */
494	{0x00ff, 0x0178}, {0x0180, 0x0243}, {0x0183, 0x0182}, {0x0185, 0x0184},
495	{0x0188, 0x0187}, {0x018c, 0x018b}, {0x0192, 0x0191}, {0x0195, 0x01f6},
496	{0x0199, 0x0198}, {0x019a, 0x023d}, {0x019e, 0x0220}, {0x01a8, 0x01a7},
497	{0x01ad, 0x01ac}, {0x01b0, 0x01af}, {0x01b9, 0x01b8}, {0x01bd, 0x01bc},
498	{0x01bf, 0x01f7}, {0x01c6, 0x01c4}, {0x01c9, 0x01c7}, {0x01cc, 0x01ca},
499	{0x01dd, 0x018e}, {0x01f3, 0x01f1}, {0x023a, 0x2c65}, {0x023e, 0x2c66},
500	{0x0253, 0x0181}, {0x0254, 0x0186}, {0x0259, 0x018f}, {0x025b, 0x0190},
501	{0x0260, 0x0193}, {0x0263, 0x0194}, {0x0268, 0x0197}, {0x0269, 0x0196},
502	{0x026b, 0x2c62}, {0x026f, 0x019c}, {0x0272, 0x019d}, {0x0275, 0x019f},
503	{0x027d, 0x2c64}, {0x0280, 0x01a6}, {0x0283, 0x01a9}, {0x0288, 0x01ae},
504	{0x0289, 0x0244}, {0x028c, 0x0245}, {0x0292, 0x01b7}, {0x03f2, 0x03f9},
505	{0x04cf, 0x04c0}, {0x1d7d, 0x2c63}, {0x214e, 0x2132}, {0}
506	};
507
508	bzero(uc, uc_size);
509	uc_size /= sizeof(ntfschar);
510	/* Start with a one-to-one mapping, i.e. no upcasing happens at all. */
511	for (i = 0; i < uc_size; i++)
512		uc[i] = cpu_to_le16(i);
513	/* Adjust specified runs by the specified amount. */
514	for (r = 0; add[r][0]; r++)
515		for (i = add[r][0]; i < add[r][1]; i++)
516			uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) + add[r][2]);
517	/* Decrement every second value in specified runs. */
518	for (r = 0; skip_dec[r][0]; r++)
519		for (i = skip_dec[r][0]; i < skip_dec[r][1];
520				i += 2)
521			uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1);
522	/* Set specified characters to specified values. */
523	for (r = 0; set[r][0]; r++)
524		uc[set[r][0]] = cpu_to_le16(set[r][1]);
525}
526