1/* $NetBSD: udf_osta.c,v 1.8 2009/03/14 21:04:24 dsl Exp $ */
2
3#include <sys/cdefs.h>
4#ifndef lint
5__KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.8 2009/03/14 21:04:24 dsl Exp $");
6#endif /* not lint */
7
8/*
9 * Various routines from the OSTA 2.01 specs.  Copyrights are included with
10 * each code segment.  Slight whitespace modifications have been made for
11 * formatting purposes.  Typos/bugs have been fixed.
12 *
13 */
14
15#include "udf_osta.h"
16
17#ifndef _KERNEL
18#include <ctype.h>
19#endif
20
21/*****************************************************************************/
22/***********************************************************************
23 * OSTA compliant Unicode compression, uncompression routines.
24 * Copyright 1995 Micro Design International, Inc.
25 * Written by Jason M. Rinn.
26 * Micro Design International gives permission for the free use of the
27 * following source code.
28 */
29
30/***********************************************************************
31 * Takes an OSTA CS0 compressed unicode name, and converts
32 * it to Unicode.
33 * The Unicode output will be in the byte order
34 * that the local compiler uses for 16-bit values.
35 * NOTE: This routine only performs error checking on the compID.
36 * It is up to the user to ensure that the unicode buffer is large
37 * enough, and that the compressed unicode name is correct.
38 *
39 * RETURN VALUE
40 *
41 * The number of unicode characters which were uncompressed.
42 * A -1 is returned if the compression ID is invalid.
43 */
44int
45udf_UncompressUnicode(
46	int numberOfBytes,	/* (Input) number of bytes read from media. */
47	byte *UDFCompressed,	/* (Input) bytes read from media. */
48	unicode_t *unicode)	/* (Output) uncompressed unicode characters. */
49{
50	unsigned int compID;
51	int returnValue, unicodeIndex, byteIndex;
52
53	/* Use UDFCompressed to store current byte being read. */
54	compID = UDFCompressed[0];
55
56	/* First check for valid compID. */
57	if (compID != 8 && compID != 16) {
58		returnValue = -1;
59	} else {
60		unicodeIndex = 0;
61		byteIndex = 1;
62
63		/* Loop through all the bytes. */
64		while (byteIndex < numberOfBytes) {
65			if (compID == 16) {
66				/* Move the first byte to the high bits of the
67				 * unicode char.
68				 */
69				unicode[unicodeIndex] =
70				    UDFCompressed[byteIndex++] << 8;
71			} else {
72				unicode[unicodeIndex] = 0;
73			}
74			if (byteIndex < numberOfBytes) {
75				/*Then the next byte to the low bits. */
76				unicode[unicodeIndex] |=
77				    UDFCompressed[byteIndex++];
78			}
79			unicodeIndex++;
80		}
81		returnValue = unicodeIndex;
82	}
83	return(returnValue);
84}
85
86/***********************************************************************
87 * DESCRIPTION:
88 * Takes a string of unicode wide characters and returns an OSTA CS0
89 * compressed unicode string. The unicode MUST be in the byte order of
90 * the compiler in order to obtain correct results. Returns an error
91 * if the compression ID is invalid.
92 *
93 * NOTE: This routine assumes the implementation already knows, by
94 * the local environment, how many bits are appropriate and
95 * therefore does no checking to test if the input characters fit
96 * into that number of bits or not.
97 *
98 * RETURN VALUE
99 *
100 * The total number of bytes in the compressed OSTA CS0 string,
101 * including the compression ID.
102 * A -1 is returned if the compression ID is invalid.
103 */
104int
105udf_CompressUnicode(
106	int numberOfChars,	/* (Input) number of unicode characters. */
107	int compID,		/* (Input) compression ID to be used. */
108	unicode_t *unicode,	/* (Input) unicode characters to compress. */
109	byte *UDFCompressed)	/* (Output) compressed string, as bytes. */
110{
111	int byteIndex, unicodeIndex;
112
113	if (compID != 8 && compID != 16) {
114		byteIndex = -1; /* Unsupported compression ID ! */
115	} else {
116		/* Place compression code in first byte. */
117		UDFCompressed[0] = compID;
118
119		byteIndex = 1;
120		unicodeIndex = 0;
121		while (unicodeIndex < numberOfChars) {
122			if (compID == 16) {
123				/* First, place the high bits of the char
124				 * into the byte stream.
125				 */
126				UDFCompressed[byteIndex++] =
127				    (unicode[unicodeIndex] & 0xFF00) >> 8;
128			}
129			/*Then place the low bits into the stream. */
130			UDFCompressed[byteIndex++] =
131			    unicode[unicodeIndex] & 0x00FF;
132			unicodeIndex++;
133		}
134	}
135	return(byteIndex);
136}
137
138/*****************************************************************************/
139/*
140 * CRC 010041
141 */
142static unsigned short crc_table[256] = {
143	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
144	0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
145	0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
146	0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
147	0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
148	0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
149	0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
150	0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
151	0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
152	0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
153	0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
154	0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
155	0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
156	0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
157	0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
158	0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
159	0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
160	0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
161	0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
162	0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
163	0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
164	0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
165	0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
166	0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
167	0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
168	0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
169	0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
170	0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
171	0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
172	0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
173	0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
174	0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
175};
176
177unsigned short
178udf_cksum(unsigned char *s, int n)
179{
180	unsigned short crc=0;
181
182	while (n-- > 0)
183		crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
184	return crc;
185}
186
187/* UNICODE Checksum */
188unsigned short
189udf_unicode_cksum(unsigned short *s, int n)
190{
191	unsigned short crc=0;
192
193	while (n-- > 0) {
194		/* Take high order byte first--corresponds to a big endian
195		 * byte stream.
196		 */
197		crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
198		crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
199	}
200	return crc;
201}
202
203
204/*
205  * Calculates a 16-bit checksum of the Implementation Use
206  * Extended Attribute header or Application Use Extended Attribute
207  * header. The fields AttributeType through ImplementationIdentifier
208  * (or ApplicationIdentifier) inclusively represent the
209  * data covered by the checksum (48 bytes).
210  *
211  */
212uint16_t udf_ea_cksum(uint8_t *data) {
213        uint16_t checksum = 0;
214        int      count;
215
216        for (count = 0; count < 48; count++) {
217               checksum += *data++;
218        }
219
220        return checksum;
221}
222
223
224#ifdef MAIN
225unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
226
227main(void)
228{
229	unsigned short x;
230	x = cksum(bytes, sizeof bytes);
231	printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
232	exit(0);
233}
234#endif
235
236/*****************************************************************************/
237/* #ifdef NEEDS_ISPRINT */
238/***********************************************************************
239 * OSTA UDF compliant file name translation routine for OS/2,
240 * Windows 95, Windows NT, Macintosh and UNIX.
241 * Copyright 1995 Micro Design International, Inc.
242 * Written by Jason M. Rinn.
243 * Micro Design International gives permission for the free use of the
244 * following source code.
245 */
246
247/***********************************************************************
248 * To use these routines with different operating systems.
249 *
250 * OS/2
251 * Define OS2
252 * Define MAXLEN = 254
253 *
254 * Windows 95
255 * Define WIN_95
256 * Define MAXLEN = 255
257 *
258 * Windows NT
259 * Define WIN_NT
260 * Define MAXLEN = 255
261 *
262 * Macintosh:
263 * Define MAC.
264 * Define MAXLEN = 31.
265 *
266 * UNIX
267 * Define UNIX.
268 * Define MAXLEN as specified by unix version.
269 */
270
271#define	ILLEGAL_CHAR_MARK	0x005F
272#define	CRC_MARK	0x0023
273#define	EXT_SIZE	5
274#define	PERIOD	0x002E
275#define	SPACE	0x0020
276
277/*** PROTOTYPES ***/
278int IsIllegal(unicode_t ch);
279
280/* Define a function or macro which determines if a Unicode character is
281 * printable under your implementation.
282 */
283
284
285/* #include <stdio.h> */
286static int UnicodeIsPrint(unicode_t ch) {
287	return (ch >=' ') && (ch != 127);
288}
289
290
291int UnicodeLength(unicode_t *string) {
292	int length;
293	length = 0;
294	while (*string++) length++;
295
296	return length;
297}
298
299
300#ifdef _KERNEL
301static int isprint(int c) {
302	return (c >= ' ') && (c != 127);
303}
304#endif
305
306
307/***********************************************************************
308 * Translates a long file name to one using a MAXLEN and an illegal
309 * char set in accord with the OSTA requirements. Assumes the name has
310 * already been translated to Unicode.
311 *
312 * RETURN VALUE
313 *
314 * Number of unicode characters in translated name.
315 */
316int UDFTransName(
317	unicode_t *newName,	/* (Output)Translated name. Must be of length
318				 * MAXLEN */
319	unicode_t *udfName,	/* (Input) Name from UDF volume.*/
320	int udfLen)		/* (Input) Length of UDF Name. */
321{
322	int Index, newIndex = 0, needsCRC = false;	/* index is shadowed */
323	int extIndex = 0, newExtIndex = 0, hasExt = false;
324#if defined OS2 || defined WIN_95 || defined WIN_NT
325	int trailIndex = 0;
326#endif
327	unsigned short valueCRC;
328	unicode_t current;
329	const char hexChar[] = "0123456789ABCDEF";
330
331	for (Index = 0; Index < udfLen; Index++) {
332		current = udfName[Index];
333
334		if (IsIllegal(current) || !UnicodeIsPrint(current)) {
335			needsCRC = true;
336			/* Replace Illegal and non-displayable chars with
337			 * underscore.
338			 */
339			current = ILLEGAL_CHAR_MARK;
340			/* Skip any other illegal or non-displayable
341			 * characters.
342			 */
343			while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
344			    || !UnicodeIsPrint(udfName[Index+1]))) {
345				Index++;
346			}
347		}
348
349		/* Record position of extension, if one is found. */
350		if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
351			if (udfLen == Index + 1) {
352				/* A trailing period is NOT an extension. */
353				hasExt = false;
354			} else {
355				hasExt = true;
356				extIndex = Index;
357				newExtIndex = newIndex;
358			}
359		}
360
361#if defined OS2 || defined WIN_95 || defined WIN_NT
362		/* Record position of last char which is NOT period or space. */
363		else if (current != PERIOD && current != SPACE) {
364			trailIndex = newIndex;
365		}
366#endif
367
368		if (newIndex < MAXLEN) {
369			newName[newIndex++] = current;
370		} else {
371			needsCRC = true;
372		}
373	}
374
375#if defined OS2 || defined WIN_95 || defined WIN_NT
376	/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
377	if (trailIndex != newIndex - 1) {
378		newIndex = trailIndex + 1;
379		needsCRC = true;
380		hasExt = false; /* Trailing period does not make an
381				 * extension. */
382	}
383#endif
384
385	if (needsCRC) {
386		unicode_t ext[EXT_SIZE];
387		int localExtIndex = 0;
388		if (hasExt) {
389			int maxFilenameLen;
390			/* Translate extension, and store it in ext. */
391			for(Index = 0; Index<EXT_SIZE &&
392			    extIndex + Index +1 < udfLen; Index++ ) {
393				current = udfName[extIndex + Index + 1];
394				if (IsIllegal(current) ||
395				    !UnicodeIsPrint(current)) {
396					needsCRC = 1;
397					/* Replace Illegal and non-displayable
398					 * chars with underscore.
399					 */
400					current = ILLEGAL_CHAR_MARK;
401					/* Skip any other illegal or
402					 * non-displayable characters.
403					 */
404					while(Index + 1 < EXT_SIZE
405					    && (IsIllegal(udfName[extIndex +
406					    Index + 2]) ||
407					    !isprint(udfName[extIndex +
408					    Index + 2]))) {
409						Index++;
410					}
411				}
412				ext[localExtIndex++] = current;
413			}
414
415			/* Truncate filename to leave room for extension and
416			 * CRC.
417			 */
418			maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
419			if (newIndex > maxFilenameLen) {
420				newIndex = maxFilenameLen;
421			} else {
422				newIndex = newExtIndex;
423			}
424		} else if (newIndex > MAXLEN - 5) {
425			/*If no extension, make sure to leave room for CRC. */
426			newIndex = MAXLEN - 5;
427		}
428		newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
429
430		/*Calculate CRC from original filename from FileIdentifier. */
431		valueCRC = udf_unicode_cksum(udfName, udfLen);
432		/* Convert 16-bits of CRC to hex characters. */
433		newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
434		newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
435		newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
436		newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
437
438		/* Place a translated extension at end, if found. */
439		if (hasExt) {
440			newName[newIndex++] = PERIOD;
441			for (Index = 0;Index < localExtIndex ;Index++ ) {
442				newName[newIndex++] = ext[Index];
443			}
444		}
445	}
446	return(newIndex);
447}
448
449#if defined OS2 || defined WIN_95 || defined WIN_NT
450/***********************************************************************
451 * Decides if a Unicode character matches one of a list
452 * of ASCII characters.
453 * Used by OS2 version of IsIllegal for readability, since all of the
454 * illegal characters above 0x0020 are in the ASCII subset of Unicode.
455 * Works very similarly to the standard C function strchr().
456 *
457 * RETURN VALUE
458 *
459 * Non-zero if the Unicode character is in the given ASCII string.
460 */
461int UnicodeInString(
462	unsigned char *string,	/* (Input) String to search through. */
463	unicode_t ch)		/* (Input) Unicode char to search for. */
464{
465	int found = false;
466	while (*string != '\0' && found == false) {
467		/* These types should compare, since both are unsigned
468		 * numbers. */
469		if (*string == ch) {
470			found = true;
471		}
472		string++;
473	}
474	return(found);
475}
476#endif /* OS2 */
477
478/***********************************************************************
479 * Decides whether the given character is illegal for a given OS.
480 *
481 * RETURN VALUE
482 *
483 * Non-zero if char is illegal.
484 */
485int IsIllegal(unicode_t ch)
486{
487#ifdef MAC
488	/* Only illegal character on the MAC is the colon. */
489	if (ch == 0x003A) {
490		return(1);
491	} else {
492		return(0);
493	}
494
495#elif defined UNIX
496	/* Illegal UNIX characters are NULL and slash. */
497	if (ch == 0x0000 || ch == 0x002F) {
498		return(1);
499	} else {
500		return(0);
501	}
502
503#elif defined OS2 || defined WIN_95 || defined WIN_NT
504	/* Illegal char's for OS/2 according to WARP toolkit. */
505	if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
506		return(1);
507	} else {
508		return(0);
509	}
510#endif
511}
512/* #endif*/	/* NEEDS_ISPRINT */
513
514