1/* $NetBSD: udf_osta.c,v 1.11 2022/02/11 16:33:18 reinoud Exp $ */ 2#if HAVE_NBTOOL_CONFIG_H 3#include "nbtool_config.h" 4#endif 5 6#include <sys/cdefs.h> 7__KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.11 2022/02/11 16:33:18 reinoud Exp $"); 8 9/* 10 * Various routines from the OSTA 2.01 specs. Copyrights are included with 11 * each code segment. Slight whitespace modifications have been made for 12 * formatting purposes. Typos/bugs have been fixed. 13 * 14 */ 15 16#include "udf_osta.h" 17 18#ifndef _KERNEL 19#include <ctype.h> 20#endif 21 22/*****************************************************************************/ 23/*********************************************************************** 24 * OSTA compliant Unicode compression, uncompression routines. 25 * Copyright 1995 Micro Design International, Inc. 26 * Written by Jason M. Rinn. 27 * Micro Design International gives permission for the free use of the 28 * following source code. 29 */ 30 31/*********************************************************************** 32 * Takes an OSTA CS0 compressed unicode name, and converts 33 * it to Unicode. 34 * The Unicode output will be in the byte order 35 * that the local compiler uses for 16-bit values. 36 * NOTE: This routine only performs error checking on the compID. 37 * It is up to the user to ensure that the unicode buffer is large 38 * enough, and that the compressed unicode name is correct. 39 * 40 * RETURN VALUE 41 * 42 * The number of unicode characters which were uncompressed. 43 * A -1 is returned if the compression ID is invalid. 44 */ 45int 46udf_UncompressUnicode( 47 int numberOfBytes, /* (Input) number of bytes read from media. */ 48 byte *UDFCompressed, /* (Input) bytes read from media. */ 49 unicode_t *unicode) /* (Output) uncompressed unicode characters. */ 50{ 51 unsigned int compID; 52 int returnValue, unicodeIndex, byteIndex; 53 54 /* Use UDFCompressed to store current byte being read. */ 55 compID = UDFCompressed[0]; 56 57 /* Translate 254/255 compID values used for deleted entries */ 58 if (compID == 254) 59 compID = 8; 60 if (compID == 255) 61 compID = 16; 62 63 /* First check for valid compID. */ 64 if (compID != 8 && compID != 16) { 65 returnValue = -1; 66 } else { 67 unicodeIndex = 0; 68 byteIndex = 1; 69 70 /* Loop through all the bytes. */ 71 while (byteIndex < numberOfBytes) { 72 if (compID == 16) { 73 /* Move the first byte to the high bits of the 74 * unicode char. 75 */ 76 unicode[unicodeIndex] = 77 UDFCompressed[byteIndex++] << 8; 78 } else { 79 unicode[unicodeIndex] = 0; 80 } 81 if (byteIndex < numberOfBytes) { 82 /*Then the next byte to the low bits. */ 83 unicode[unicodeIndex] |= 84 UDFCompressed[byteIndex++]; 85 } 86 unicodeIndex++; 87 } 88 returnValue = unicodeIndex; 89 } 90 return(returnValue); 91} 92 93/*********************************************************************** 94 * DESCRIPTION: 95 * Takes a string of unicode wide characters and returns an OSTA CS0 96 * compressed unicode string. The unicode MUST be in the byte order of 97 * the compiler in order to obtain correct results. Returns an error 98 * if the compression ID is invalid. 99 * 100 * NOTE: This routine assumes the implementation already knows, by 101 * the local environment, how many bits are appropriate and 102 * therefore does no checking to test if the input characters fit 103 * into that number of bits or not. 104 * 105 * RETURN VALUE 106 * 107 * The total number of bytes in the compressed OSTA CS0 string, 108 * including the compression ID. 109 * A -1 is returned if the compression ID is invalid. 110 */ 111int 112udf_CompressUnicode( 113 int numberOfChars, /* (Input) number of unicode characters. */ 114 int compID, /* (Input) compression ID to be used. */ 115 unicode_t *unicode, /* (Input) unicode characters to compress. */ 116 byte *UDFCompressed) /* (Output) compressed string, as bytes. */ 117{ 118 int byteIndex, unicodeIndex; 119 120 if (compID != 8 && compID != 16) { 121 byteIndex = -1; /* Unsupported compression ID ! */ 122 } else { 123 /* Place compression code in first byte. */ 124 UDFCompressed[0] = compID; 125 126 byteIndex = 1; 127 unicodeIndex = 0; 128 while (unicodeIndex < numberOfChars) { 129 if (compID == 16) { 130 /* First, place the high bits of the char 131 * into the byte stream. 132 */ 133 UDFCompressed[byteIndex++] = 134 (unicode[unicodeIndex] & 0xFF00) >> 8; 135 } 136 /*Then place the low bits into the stream. */ 137 UDFCompressed[byteIndex++] = 138 unicode[unicodeIndex] & 0x00FF; 139 unicodeIndex++; 140 } 141 } 142 return(byteIndex); 143} 144 145/*****************************************************************************/ 146/* 147 * CRC 010041 148 */ 149static unsigned short crc_table[256] = { 150 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, 151 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 152 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, 153 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, 154 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, 155 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, 156 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, 157 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 158 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, 159 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, 160 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, 161 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, 162 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, 163 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 164 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, 165 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, 166 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, 167 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, 168 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, 169 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, 170 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, 171 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 172 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, 173 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, 174 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, 175 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, 176 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, 177 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, 178 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 179 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, 180 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, 181 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 182}; 183 184unsigned short 185udf_cksum(unsigned char *s, int n) 186{ 187 unsigned short crc=0; 188 189 while (n-- > 0) 190 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8); 191 return crc; 192} 193 194/* UNICODE Checksum */ 195unsigned short 196udf_unicode_cksum(unsigned short *s, int n) 197{ 198 unsigned short crc=0; 199 200 while (n-- > 0) { 201 /* Take high order byte first--corresponds to a big endian 202 * byte stream. 203 */ 204 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8); 205 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8); 206 } 207 return crc; 208} 209 210 211/* 212 * Calculates a 16-bit checksum of the Implementation Use 213 * Extended Attribute header or Application Use Extended Attribute 214 * header. The fields AttributeType through ImplementationIdentifier 215 * (or ApplicationIdentifier) inclusively represent the 216 * data covered by the checksum (48 bytes). 217 * 218 */ 219uint16_t udf_ea_cksum(uint8_t *data) { 220 uint16_t checksum = 0; 221 int count; 222 223 for (count = 0; count < 48; count++) { 224 checksum += *data++; 225 } 226 227 return checksum; 228} 229 230 231#ifdef MAIN 232unsigned char bytes[] = { 0x70, 0x6A, 0x77 }; 233 234main(void) 235{ 236 unsigned short x; 237 x = cksum(bytes, sizeof bytes); 238 printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299); 239 exit(0); 240} 241#endif 242 243/*****************************************************************************/ 244/* #ifdef NEEDS_ISPRINT */ 245/*********************************************************************** 246 * OSTA UDF compliant file name translation routine for OS/2, 247 * Windows 95, Windows NT, Macintosh and UNIX. 248 * Copyright 1995 Micro Design International, Inc. 249 * Written by Jason M. Rinn. 250 * Micro Design International gives permission for the free use of the 251 * following source code. 252 */ 253 254/*********************************************************************** 255 * To use these routines with different operating systems. 256 * 257 * OS/2 258 * Define OS2 259 * Define MAXLEN = 254 260 * 261 * Windows 95 262 * Define WIN_95 263 * Define MAXLEN = 255 264 * 265 * Windows NT 266 * Define WIN_NT 267 * Define MAXLEN = 255 268 * 269 * Macintosh: 270 * Define MAC. 271 * Define MAXLEN = 31. 272 * 273 * UNIX 274 * Define UNIX. 275 * Define MAXLEN as specified by unix version. 276 */ 277 278#define ILLEGAL_CHAR_MARK 0x005F 279#define CRC_MARK 0x0023 280#define EXT_SIZE 5 281#define PERIOD 0x002E 282#define SPACE 0x0020 283 284/*** PROTOTYPES ***/ 285int IsIllegal(unicode_t ch); 286 287/* Define a function or macro which determines if a Unicode character is 288 * printable under your implementation. 289 */ 290 291 292/* #include <stdio.h> */ 293static int UnicodeIsPrint(unicode_t ch) { 294 return (ch >=' ') && (ch != 127); 295} 296 297 298int UnicodeLength(unicode_t *string) { 299 int length; 300 length = 0; 301 while (*string++) length++; 302 303 return length; 304} 305 306 307#ifdef _KERNEL 308static int isprint(int c) { 309 return (c >= ' ') && (c != 127); 310} 311#endif 312 313 314/*********************************************************************** 315 * Translates a long file name to one using a MAXLEN and an illegal 316 * char set in accord with the OSTA requirements. Assumes the name has 317 * already been translated to Unicode. 318 * 319 * RETURN VALUE 320 * 321 * Number of unicode characters in translated name. 322 */ 323int UDFTransName( 324 unicode_t *newName, /* (Output)Translated name. Must be of length 325 * MAXLEN */ 326 unicode_t *udfName, /* (Input) Name from UDF volume.*/ 327 int udfLen) /* (Input) Length of UDF Name. */ 328{ 329 int Index, newIndex = 0, needsCRC = false; /* index is shadowed */ 330 int extIndex = 0, newExtIndex = 0, hasExt = false; 331#if defined OS2 || defined WIN_95 || defined WIN_NT 332 int trailIndex = 0; 333#endif 334 unsigned short valueCRC; 335 unicode_t current; 336 const char hexChar[] = "0123456789ABCDEF"; 337 338 for (Index = 0; Index < udfLen; Index++) { 339 current = udfName[Index]; 340 341 if (IsIllegal(current) || !UnicodeIsPrint(current)) { 342 needsCRC = true; 343 /* Replace Illegal and non-displayable chars with 344 * underscore. 345 */ 346 current = ILLEGAL_CHAR_MARK; 347 /* Skip any other illegal or non-displayable 348 * characters. 349 */ 350 while(Index+1 < udfLen && (IsIllegal(udfName[Index+1]) 351 || !UnicodeIsPrint(udfName[Index+1]))) { 352 Index++; 353 } 354 } 355 356 /* Record position of extension, if one is found. */ 357 if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) { 358 if (udfLen == Index + 1) { 359 /* A trailing period is NOT an extension. */ 360 hasExt = false; 361 } else { 362 hasExt = true; 363 extIndex = Index; 364 newExtIndex = newIndex; 365 } 366 } 367 368#if defined OS2 || defined WIN_95 || defined WIN_NT 369 /* Record position of last char which is NOT period or space. */ 370 else if (current != PERIOD && current != SPACE) { 371 trailIndex = newIndex; 372 } 373#endif 374 375 if (newIndex < MAXLEN) { 376 newName[newIndex++] = current; 377 } else { 378 needsCRC = true; 379 } 380 } 381 382#if defined OS2 || defined WIN_95 || defined WIN_NT 383 /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */ 384 if (trailIndex != newIndex - 1) { 385 newIndex = trailIndex + 1; 386 needsCRC = true; 387 hasExt = false; /* Trailing period does not make an 388 * extension. */ 389 } 390#endif 391 392 if (needsCRC) { 393 unicode_t ext[EXT_SIZE]; 394 int localExtIndex = 0; 395 if (hasExt) { 396 int maxFilenameLen; 397 /* Translate extension, and store it in ext. */ 398 for(Index = 0; Index<EXT_SIZE && 399 extIndex + Index +1 < udfLen; Index++ ) { 400 current = udfName[extIndex + Index + 1]; 401 if (IsIllegal(current) || 402 !UnicodeIsPrint(current)) { 403 needsCRC = 1; 404 /* Replace Illegal and non-displayable 405 * chars with underscore. 406 */ 407 current = ILLEGAL_CHAR_MARK; 408 /* Skip any other illegal or 409 * non-displayable characters. 410 */ 411 while(Index + 1 < EXT_SIZE 412 && (IsIllegal(udfName[extIndex + 413 Index + 2]) || 414 !isprint(udfName[extIndex + 415 Index + 2]))) { 416 Index++; 417 } 418 } 419 ext[localExtIndex++] = current; 420 } 421 422 /* Truncate filename to leave room for extension and 423 * CRC. 424 */ 425 maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1); 426 if (newIndex > maxFilenameLen) { 427 newIndex = maxFilenameLen; 428 } else { 429 newIndex = newExtIndex; 430 } 431 } else if (newIndex > MAXLEN - 5) { 432 /*If no extension, make sure to leave room for CRC. */ 433 newIndex = MAXLEN - 5; 434 } 435 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */ 436 437 /*Calculate CRC from original filename from FileIdentifier. */ 438 valueCRC = udf_unicode_cksum(udfName, udfLen); 439 /* Convert 16-bits of CRC to hex characters. */ 440 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; 441 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; 442 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; 443 newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; 444 445 /* Place a translated extension at end, if found. */ 446 if (hasExt) { 447 newName[newIndex++] = PERIOD; 448 for (Index = 0;Index < localExtIndex ;Index++ ) { 449 newName[newIndex++] = ext[Index]; 450 } 451 } 452 } 453 return(newIndex); 454} 455 456#if defined OS2 || defined WIN_95 || defined WIN_NT 457/*********************************************************************** 458 * Decides if a Unicode character matches one of a list 459 * of ASCII characters. 460 * Used by OS2 version of IsIllegal for readability, since all of the 461 * illegal characters above 0x0020 are in the ASCII subset of Unicode. 462 * Works very similarly to the standard C function strchr(). 463 * 464 * RETURN VALUE 465 * 466 * Non-zero if the Unicode character is in the given ASCII string. 467 */ 468int UnicodeInString( 469 unsigned char *string, /* (Input) String to search through. */ 470 unicode_t ch) /* (Input) Unicode char to search for. */ 471{ 472 int found = false; 473 while (*string != '\0' && found == false) { 474 /* These types should compare, since both are unsigned 475 * numbers. */ 476 if (*string == ch) { 477 found = true; 478 } 479 string++; 480 } 481 return(found); 482} 483#endif /* OS2 */ 484 485/*********************************************************************** 486 * Decides whether the given character is illegal for a given OS. 487 * 488 * RETURN VALUE 489 * 490 * Non-zero if char is illegal. 491 */ 492int IsIllegal(unicode_t ch) 493{ 494#ifdef MAC 495 /* Only illegal character on the MAC is the colon. */ 496 if (ch == 0x003A) { 497 return(1); 498 } else { 499 return(0); 500 } 501 502#elif defined UNIX 503 /* Illegal UNIX characters are NULL and slash. */ 504 if (ch == 0x0000 || ch == 0x002F) { 505 return(1); 506 } else { 507 return(0); 508 } 509 510#elif defined OS2 || defined WIN_95 || defined WIN_NT 511 /* Illegal char's for OS/2 according to WARP toolkit. */ 512 if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) { 513 return(1); 514 } else { 515 return(0); 516 } 517#endif 518} 519/* #endif*/ /* NEEDS_ISPRINT */ 520 521