1/*
2 * Various routines from the OSTA 2.01 specs.  Copyrights are included with
3 * each code segment.  Slight whitespace modifications have been made for
4 * formatting purposes.  Typos/bugs have been fixed.
5 */
6
7#include <fs/udf/osta.h>
8
9/*****************************************************************************/
10/*-
11 **********************************************************************
12 * OSTA compliant Unicode compression, uncompression routines.
13 * Copyright 1995 Micro Design International, Inc.
14 * Written by Jason M. Rinn.
15 * Micro Design International gives permission for the free use of the
16 * following source code.
17 */
18
19/***********************************************************************
20 * Takes an OSTA CS0 compressed unicode name, and converts
21 * it to Unicode.
22 * The Unicode output will be in the byte order
23 * that the local compiler uses for 16-bit values.
24 * NOTE: This routine only performs error checking on the compID.
25 * It is up to the user to ensure that the unicode buffer is large
26 * enough, and that the compressed unicode name is correct.
27 *
28 * RETURN VALUE
29 *
30 * The number of unicode characters which were uncompressed.
31 * A -1 is returned if the compression ID is invalid.
32 */
33int
34udf_UncompressUnicode(
35	int numberOfBytes,	/* (Input) number of bytes read from media. */
36	byte *UDFCompressed,	/* (Input) bytes read from media. */
37	unicode_t *unicode)	/* (Output) uncompressed unicode characters. */
38{
39	unsigned int compID;
40	int returnValue, unicodeIndex, byteIndex;
41
42	/* Use UDFCompressed to store current byte being read. */
43	compID = UDFCompressed[0];
44
45	/* First check for valid compID. */
46	if (compID != 8 && compID != 16) {
47		returnValue = -1;
48	} else {
49		unicodeIndex = 0;
50		byteIndex = 1;
51
52		/* Loop through all the bytes. */
53		while (byteIndex < numberOfBytes) {
54			if (compID == 16) {
55				/* Move the first byte to the high bits of the
56				 * unicode char.
57				 */
58				unicode[unicodeIndex] =
59				    UDFCompressed[byteIndex++] << 8;
60			} else {
61				unicode[unicodeIndex] = 0;
62			}
63			if (byteIndex < numberOfBytes) {
64				/*Then the next byte to the low bits. */
65				unicode[unicodeIndex] |=
66				    UDFCompressed[byteIndex++];
67			}
68			unicodeIndex++;
69		}
70		returnValue = unicodeIndex;
71	}
72	return(returnValue);
73}
74
75/*
76 * Almost same as udf_UncompressUnicode(). The difference is that
77 * it keeps byte order of unicode string.
78 */
79int
80udf_UncompressUnicodeByte(
81	int numberOfBytes,	/* (Input) number of bytes read from media. */
82	byte *UDFCompressed,	/* (Input) bytes read from media. */
83	byte *unicode)		/* (Output) uncompressed unicode characters. */
84{
85	unsigned int compID;
86	int returnValue, unicodeIndex, byteIndex;
87
88	/* Use UDFCompressed to store current byte being read. */
89	compID = UDFCompressed[0];
90
91	/* First check for valid compID. */
92	if (compID != 8 && compID != 16) {
93		returnValue = -1;
94	} else {
95		unicodeIndex = 0;
96		byteIndex = 1;
97
98		/* Loop through all the bytes. */
99		while (byteIndex < numberOfBytes) {
100			if (compID == 16) {
101				/* Move the first byte to the high bits of the
102				 * unicode char.
103				 */
104				unicode[unicodeIndex++] =
105				    UDFCompressed[byteIndex++];
106			} else {
107				unicode[unicodeIndex++] = 0;
108			}
109			if (byteIndex < numberOfBytes) {
110				/*Then the next byte to the low bits. */
111				unicode[unicodeIndex++] =
112				    UDFCompressed[byteIndex++];
113			}
114		}
115		returnValue = unicodeIndex;
116	}
117	return(returnValue);
118}
119
120/***********************************************************************
121 * DESCRIPTION:
122 * Takes a string of unicode wide characters and returns an OSTA CS0
123 * compressed unicode string. The unicode MUST be in the byte order of
124 * the compiler in order to obtain correct results. Returns an error
125 * if the compression ID is invalid.
126 *
127 * NOTE: This routine assumes the implementation already knows, by
128 * the local environment, how many bits are appropriate and
129 * therefore does no checking to test if the input characters fit
130 * into that number of bits or not.
131 *
132 * RETURN VALUE
133 *
134 * The total number of bytes in the compressed OSTA CS0 string,
135 * including the compression ID.
136 * A -1 is returned if the compression ID is invalid.
137 */
138int
139udf_CompressUnicode(
140	int numberOfChars,	/* (Input) number of unicode characters. */
141	int compID,		/* (Input) compression ID to be used. */
142	unicode_t *unicode,	/* (Input) unicode characters to compress. */
143	byte *UDFCompressed)	/* (Output) compressed string, as bytes. */
144{
145	int byteIndex, unicodeIndex;
146
147	if (compID != 8 && compID != 16) {
148		byteIndex = -1; /* Unsupported compression ID ! */
149	} else {
150		/* Place compression code in first byte. */
151		UDFCompressed[0] = compID;
152
153		byteIndex = 1;
154		unicodeIndex = 0;
155		while (unicodeIndex < numberOfChars) {
156			if (compID == 16) {
157				/* First, place the high bits of the char
158				 * into the byte stream.
159				 */
160				UDFCompressed[byteIndex++] =
161				    (unicode[unicodeIndex] & 0xFF00) >> 8;
162			}
163			/*Then place the low bits into the stream. */
164			UDFCompressed[byteIndex++] =
165			    unicode[unicodeIndex] & 0x00FF;
166			unicodeIndex++;
167		}
168	}
169	return(byteIndex);
170}
171
172/*****************************************************************************/
173/*
174 * CRC 010041
175 */
176static unsigned short crc_table[256] = {
177	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
178	0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
179	0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
180	0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
181	0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
182	0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
183	0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
184	0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
185	0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
186	0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
187	0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
188	0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
189	0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
190	0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
191	0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
192	0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
193	0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
194	0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
195	0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
196	0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
197	0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
198	0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
199	0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
200	0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
201	0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
202	0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
203	0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
204	0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
205	0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
206	0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
207	0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
208	0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
209};
210
211unsigned short
212udf_cksum(unsigned char *s, int n)
213{
214	unsigned short crc=0;
215
216	while (n-- > 0)
217		crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
218	return crc;
219}
220
221/* UNICODE Checksum */
222unsigned short
223udf_unicode_cksum(unsigned short *s, int n)
224{
225	unsigned short crc=0;
226
227	while (n-- > 0) {
228		/* Take high order byte first--corresponds to a big endian
229		 * byte stream.
230		 */
231		crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
232		crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
233	}
234	return crc;
235}
236
237#ifdef MAIN
238unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
239
240main()
241{
242	unsigned short x;
243	x = cksum(bytes, sizeof bytes);
244	printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
245	exit(0);
246}
247#endif
248
249/*****************************************************************************/
250#ifdef NEEDS_ISPRINT
251/*-
252 **********************************************************************
253 * OSTA UDF compliant file name translation routine for OS/2,
254 * Windows 95, Windows NT, Macintosh and UNIX.
255 * Copyright 1995 Micro Design International, Inc.
256 * Written by Jason M. Rinn.
257 * Micro Design International gives permission for the free use of the
258 * following source code.
259 */
260
261/***********************************************************************
262 * To use these routines with different operating systems.
263 *
264 * OS/2
265 * Define OS2
266 * Define MAXLEN = 254
267 *
268 * Windows 95
269 * Define WIN_95
270 * Define MAXLEN = 255
271 *
272 * Windows NT
273 * Define WIN_NT
274 * Define MAXLEN = 255
275 *
276 * Macintosh:
277 * Define APPLE_MAC.
278 * Define MAXLEN = 31.
279 *
280 * UNIX
281 * Define UNIX.
282 * Define MAXLEN as specified by unix version.
283 */
284
285#define	ILLEGAL_CHAR_MARK	0x005F
286#define	CRC_MARK	0x0023
287#define	EXT_SIZE	5
288#define	TRUE	1
289#define	FALSE	0
290#define	PERIOD	0x002E
291#define	SPACE	0x0020
292
293/*** PROTOTYPES ***/
294int IsIllegal(unicode_t ch);
295
296/* Define a function or macro which determines if a Unicode character is
297 * printable under your implementation.
298 */
299int UnicodeIsPrint(unicode_t);
300
301/***********************************************************************
302 * Translates a long file name to one using a MAXLEN and an illegal
303 * char set in accord with the OSTA requirements. Assumes the name has
304 * already been translated to Unicode.
305 *
306 * RETURN VALUE
307 *
308 * Number of unicode characters in translated name.
309 */
310int UDFTransName(
311	unicode_t *newName,	/* (Output)Translated name. Must be of length
312				 * MAXLEN */
313	unicode_t *udfName,	/* (Input) Name from UDF volume.*/
314	int udfLen)		/* (Input) Length of UDF Name. */
315{
316	int index, newIndex = 0, needsCRC = FALSE;
317	int extIndex = 0, newExtIndex = 0, hasExt = FALSE;
318#if defined OS2 || defined WIN_95 || defined WIN_NT
319	int trailIndex = 0;
320#endif
321	unsigned short valueCRC;
322	unicode_t current;
323	const char hexChar[] = "0123456789ABCDEF";
324
325	for (index = 0; index < udfLen; index++) {
326		current = udfName[index];
327
328		if (IsIllegal(current) || !UnicodeIsPrint(current)) {
329			needsCRC = TRUE;
330			/* Replace Illegal and non-displayable chars with
331			 * underscore.
332			 */
333			current = ILLEGAL_CHAR_MARK;
334			/* Skip any other illegal or non-displayable
335			 * characters.
336			 */
337			while(index+1 < udfLen && (IsIllegal(udfName[index+1])
338			    || !UnicodeIsPrint(udfName[index+1]))) {
339				index++;
340			}
341		}
342
343		/* Record position of extension, if one is found. */
344		if (current == PERIOD && (udfLen - index -1) <= EXT_SIZE) {
345			if (udfLen == index + 1) {
346				/* A trailing period is NOT an extension. */
347				hasExt = FALSE;
348			} else {
349				hasExt = TRUE;
350				extIndex = index;
351				newExtIndex = newIndex;
352			}
353		}
354
355#if defined OS2 || defined WIN_95 || defined WIN_NT
356		/* Record position of last char which is NOT period or space. */
357		else if (current != PERIOD && current != SPACE) {
358			trailIndex = newIndex;
359		}
360#endif
361
362		if (newIndex < MAXLEN) {
363			newName[newIndex++] = current;
364		} else {
365			needsCRC = TRUE;
366		}
367	}
368
369#if defined OS2 || defined WIN_95 || defined WIN_NT
370	/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
371	if (trailIndex != newIndex - 1) {
372		newIndex = trailIndex + 1;
373		needsCRC = TRUE;
374		hasExt = FALSE; /* Trailing period does not make an
375				 * extension. */
376	}
377#endif
378
379	if (needsCRC) {
380		unicode_t ext[EXT_SIZE];
381		int localExtIndex = 0;
382		if (hasExt) {
383			int maxFilenameLen;
384			/* Translate extension, and store it in ext. */
385			for(index = 0; index<EXT_SIZE &&
386			    extIndex + index +1 < udfLen; index++ ) {
387				current = udfName[extIndex + index + 1];
388				if (IsIllegal(current) ||
389				    !UnicodeIsPrint(current)) {
390					needsCRC = 1;
391					/* Replace Illegal and non-displayable
392					 * chars with underscore.
393					 */
394					current = ILLEGAL_CHAR_MARK;
395					/* Skip any other illegal or
396					 * non-displayable characters.
397					 */
398					while(index + 1 < EXT_SIZE
399					    && (IsIllegal(udfName[extIndex +
400					    index + 2]) ||
401					    !isprint(udfName[extIndex +
402					    index + 2]))) {
403						index++;
404					}
405				}
406				ext[localExtIndex++] = current;
407			}
408
409			/* Truncate filename to leave room for extension and
410			 * CRC.
411			 */
412			maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
413			if (newIndex > maxFilenameLen) {
414				newIndex = maxFilenameLen;
415			} else {
416				newIndex = newExtIndex;
417			}
418		} else if (newIndex > MAXLEN - 5) {
419			/*If no extension, make sure to leave room for CRC. */
420			newIndex = MAXLEN - 5;
421		}
422		newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
423
424		/*Calculate CRC from original filename from FileIdentifier. */
425		valueCRC = udf_unicode_cksum(udfName, udfLen);
426		/* Convert 16-bits of CRC to hex characters. */
427		newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
428		newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
429		newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
430		newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
431
432		/* Place a translated extension at end, if found. */
433		if (hasExt) {
434			newName[newIndex++] = PERIOD;
435			for (index = 0;index < localExtIndex ;index++ ) {
436				newName[newIndex++] = ext[index];
437			}
438		}
439	}
440	return(newIndex);
441}
442
443#if defined OS2 || defined WIN_95 || defined WIN_NT
444/***********************************************************************
445 * Decides if a Unicode character matches one of a list
446 * of ASCII characters.
447 * Used by OS2 version of IsIllegal for readability, since all of the
448 * illegal characters above 0x0020 are in the ASCII subset of Unicode.
449 * Works very similarly to the standard C function strchr().
450 *
451 * RETURN VALUE
452 *
453 * Non-zero if the Unicode character is in the given ASCII string.
454 */
455int UnicodeInString(
456	unsigned char *string,	/* (Input) String to search through. */
457	unicode_t ch)		/* (Input) Unicode char to search for. */
458{
459	int found = FALSE;
460	while (*string != '\0' && found == FALSE) {
461		/* These types should compare, since both are unsigned
462		 * numbers. */
463		if (*string == ch) {
464			found = TRUE;
465		}
466		string++;
467	}
468	return(found);
469}
470#endif /* OS2 */
471
472/***********************************************************************
473 * Decides whether the given character is illegal for a given OS.
474 *
475 * RETURN VALUE
476 *
477 * Non-zero if char is illegal.
478 */
479int IsIllegal(unicode_t ch)
480{
481#ifdef APPLE_MAC
482	/* Only illegal character on the MAC is the colon. */
483	if (ch == 0x003A) {
484		return(1);
485	} else {
486		return(0);
487	}
488
489#elif defined UNIX
490	/* Illegal UNIX characters are NULL and slash. */
491	if (ch == 0x0000 || ch == 0x002F) {
492		return(1);
493	} else {
494		return(0);
495	}
496
497#elif defined OS2 || defined WIN_95 || defined WIN_NT
498	/* Illegal char's for OS/2 according to WARP toolkit. */
499	if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
500		return(1);
501	} else {
502		return(0);
503	}
504#endif
505}
506#endif
507