readcdf.c revision 300899
1/*- 2 * Copyright (c) 2008 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26#include "file.h" 27 28#ifndef lint 29FILE_RCSID("@(#)$File: readcdf.c,v 1.57 2016/05/03 16:08:49 christos Exp $") 30#endif 31 32#include <assert.h> 33#include <stdlib.h> 34#include <unistd.h> 35#include <string.h> 36#include <time.h> 37#include <ctype.h> 38 39#include "cdf.h" 40#include "magic.h" 41 42#ifndef __arraycount 43#define __arraycount(a) (sizeof(a) / sizeof(a[0])) 44#endif 45 46#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 47 48static const struct nv { 49 const char *pattern; 50 const char *mime; 51} app2mime[] = { 52 { "Word", "msword", }, 53 { "Excel", "vnd.ms-excel", }, 54 { "Powerpoint", "vnd.ms-powerpoint", }, 55 { "Crystal Reports", "x-rpt", }, 56 { "Advanced Installer", "vnd.ms-msi", }, 57 { "InstallShield", "vnd.ms-msi", }, 58 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 59 { "NAnt", "vnd.ms-msi", }, 60 { "Windows Installer", "vnd.ms-msi", }, 61 { NULL, NULL, }, 62}, name2mime[] = { 63 { "Book", "vnd.ms-excel", }, 64 { "Workbook", "vnd.ms-excel", }, 65 { "WordDocument", "msword", }, 66 { "PowerPoint", "vnd.ms-powerpoint", }, 67 { "DigitalSignature", "vnd.ms-msi", }, 68 { NULL, NULL, }, 69}, name2desc[] = { 70 { "Book", "Microsoft Excel", }, 71 { "Workbook", "Microsoft Excel", }, 72 { "WordDocument", "Microsoft Word", }, 73 { "PowerPoint", "Microsoft PowerPoint", }, 74 { "DigitalSignature", "Microsoft Installer", }, 75 { NULL, NULL, }, 76}; 77 78static const struct cv { 79 uint64_t clsid[2]; 80 const char *mime; 81} clsid2mime[] = { 82 { 83 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 84 "x-msi", 85 }, 86 { { 0, 0 }, 87 NULL, 88 }, 89}, clsid2desc[] = { 90 { 91 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 92 "MSI Installer", 93 }, 94 { { 0, 0 }, 95 NULL, 96 }, 97}; 98 99private const char * 100cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 101{ 102 size_t i; 103 for (i = 0; cv[i].mime != NULL; i++) { 104 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 105 return cv[i].mime; 106 } 107#ifdef CDF_DEBUG 108 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 109 clsid[1]); 110#endif 111 return NULL; 112} 113 114private const char * 115cdf_app_to_mime(const char *vbuf, const struct nv *nv) 116{ 117 size_t i; 118 const char *rv = NULL; 119#ifdef USE_C_LOCALE 120 locale_t old_lc_ctype, c_lc_ctype; 121 122 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 123 assert(c_lc_ctype != NULL); 124 old_lc_ctype = uselocale(c_lc_ctype); 125 assert(old_lc_ctype != NULL); 126#else 127 char *old_lc_ctype = setlocale(LC_CTYPE, "C"); 128#endif 129 for (i = 0; nv[i].pattern != NULL; i++) 130 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 131 rv = nv[i].mime; 132 break; 133 } 134#ifdef CDF_DEBUG 135 fprintf(stderr, "unknown app %s\n", vbuf); 136#endif 137#ifdef USE_C_LOCALE 138 (void)uselocale(old_lc_ctype); 139 freelocale(c_lc_ctype); 140#else 141 setlocale(LC_CTYPE, old_lc_ctype); 142#endif 143 return rv; 144} 145 146private int 147cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 148 size_t count, const cdf_directory_t *root_storage) 149{ 150 size_t i; 151 cdf_timestamp_t tp; 152 struct timespec ts; 153 char buf[64]; 154 const char *str = NULL; 155 const char *s; 156 int len; 157 158 if (!NOTMIME(ms) && root_storage) 159 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 160 clsid2mime); 161 162 for (i = 0; i < count; i++) { 163 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 164 switch (info[i].pi_type) { 165 case CDF_NULL: 166 break; 167 case CDF_SIGNED16: 168 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 169 info[i].pi_s16) == -1) 170 return -1; 171 break; 172 case CDF_SIGNED32: 173 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 174 info[i].pi_s32) == -1) 175 return -1; 176 break; 177 case CDF_UNSIGNED32: 178 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 179 info[i].pi_u32) == -1) 180 return -1; 181 break; 182 case CDF_FLOAT: 183 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 184 info[i].pi_f) == -1) 185 return -1; 186 break; 187 case CDF_DOUBLE: 188 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 189 info[i].pi_d) == -1) 190 return -1; 191 break; 192 case CDF_LENGTH32_STRING: 193 case CDF_LENGTH32_WSTRING: 194 len = info[i].pi_str.s_len; 195 if (len > 1) { 196 char vbuf[1024]; 197 size_t j, k = 1; 198 199 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 200 k++; 201 s = info[i].pi_str.s_buf; 202 for (j = 0; j < sizeof(vbuf) && len--; s += k) { 203 if (*s == '\0') 204 break; 205 if (isprint((unsigned char)*s)) 206 vbuf[j++] = *s; 207 } 208 if (j == sizeof(vbuf)) 209 --j; 210 vbuf[j] = '\0'; 211 if (NOTMIME(ms)) { 212 if (vbuf[0]) { 213 if (file_printf(ms, ", %s: %s", 214 buf, vbuf) == -1) 215 return -1; 216 } 217 } else if (str == NULL && info[i].pi_id == 218 CDF_PROPERTY_NAME_OF_APPLICATION) { 219 str = cdf_app_to_mime(vbuf, app2mime); 220 } 221 } 222 break; 223 case CDF_FILETIME: 224 tp = info[i].pi_tp; 225 if (tp != 0) { 226 char tbuf[64]; 227 if (tp < 1000000000000000LL) { 228 cdf_print_elapsed_time(tbuf, 229 sizeof(tbuf), tp); 230 if (NOTMIME(ms) && file_printf(ms, 231 ", %s: %s", buf, tbuf) == -1) 232 return -1; 233 } else { 234 char *c, *ec; 235 cdf_timestamp_to_timespec(&ts, tp); 236 c = cdf_ctime(&ts.tv_sec, tbuf); 237 if (c != NULL && 238 (ec = strchr(c, '\n')) != NULL) 239 *ec = '\0'; 240 241 if (NOTMIME(ms) && file_printf(ms, 242 ", %s: %s", buf, c) == -1) 243 return -1; 244 } 245 } 246 break; 247 case CDF_CLIPBOARD: 248 break; 249 default: 250 return -1; 251 } 252 } 253 if (!NOTMIME(ms)) { 254 if (str == NULL) 255 return 0; 256 if (file_printf(ms, "application/%s", str) == -1) 257 return -1; 258 } 259 return 1; 260} 261 262private int 263cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 264 const cdf_stream_t *sst) 265{ 266 cdf_catalog_t *cat; 267 size_t i; 268 char buf[256]; 269 cdf_catalog_entry_t *ce; 270 271 if (NOTMIME(ms)) { 272 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 273 return -1; 274 if (cdf_unpack_catalog(h, sst, &cat) == -1) 275 return -1; 276 ce = cat->cat_e; 277 /* skip first entry since it has a , or paren */ 278 for (i = 1; i < cat->cat_num; i++) 279 if (file_printf(ms, "%s%s", 280 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 281 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 282 free(cat); 283 return -1; 284 } 285 free(cat); 286 } else { 287 if (file_printf(ms, "application/CDFV2") == -1) 288 return -1; 289 } 290 return 1; 291} 292 293private int 294cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 295 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 296{ 297 cdf_summary_info_header_t si; 298 cdf_property_info_t *info; 299 size_t count; 300 int m; 301 302 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 303 return -1; 304 305 if (NOTMIME(ms)) { 306 const char *str; 307 308 if (file_printf(ms, "Composite Document File V2 Document") 309 == -1) 310 return -1; 311 312 if (file_printf(ms, ", %s Endian", 313 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 314 return -2; 315 switch (si.si_os) { 316 case 2: 317 if (file_printf(ms, ", Os: Windows, Version %d.%d", 318 si.si_os_version & 0xff, 319 (uint32_t)si.si_os_version >> 8) == -1) 320 return -2; 321 break; 322 case 1: 323 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 324 (uint32_t)si.si_os_version >> 8, 325 si.si_os_version & 0xff) == -1) 326 return -2; 327 break; 328 default: 329 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 330 si.si_os_version & 0xff, 331 (uint32_t)si.si_os_version >> 8) == -1) 332 return -2; 333 break; 334 } 335 if (root_storage) { 336 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 337 clsid2desc); 338 if (str) { 339 if (file_printf(ms, ", %s", str) == -1) 340 return -2; 341 } 342 } 343 } 344 345 m = cdf_file_property_info(ms, info, count, root_storage); 346 free(info); 347 348 return m == -1 ? -2 : m; 349} 350 351#ifdef notdef 352private char * 353format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 354 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 355 PRIx64 "-%.12" PRIx64, 356 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 357 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 358 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 359 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 360 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 361 return buf; 362} 363#endif 364 365private int 366cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 367 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 368 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 369{ 370 int i; 371 372 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 373 dir, "Catalog", scn)) == -1) 374 return i; 375#ifdef CDF_DEBUG 376 cdf_dump_catalog(&h, scn); 377#endif 378 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 379 return -1; 380 return i; 381} 382 383private struct sinfo { 384 const char *name; 385 const char *mime; 386 const char *sections[5]; 387 const int types[5]; 388} sectioninfo[] = { 389 { "Encrypted", "encrypted", 390 { 391 "EncryptedPackage", NULL, NULL, NULL, NULL, 392 }, 393 { 394 CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0, 395 396 }, 397 }, 398 { "QuickBooks", "quickbooks", 399 { 400#if 0 401 "TaxForms", "PDFTaxForms", "modulesInBackup", 402#endif 403 "mfbu_header", NULL, NULL, NULL, NULL, 404 }, 405 { 406#if 0 407 CDF_DIR_TYPE_USER_STORAGE, 408 CDF_DIR_TYPE_USER_STORAGE, 409 CDF_DIR_TYPE_USER_STREAM, 410#endif 411 CDF_DIR_TYPE_USER_STREAM, 412 0, 0, 0, 0 413 }, 414 }, 415}; 416 417private int 418cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 419{ 420 size_t sd, j; 421 422 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 423 const struct sinfo *si = §ioninfo[sd]; 424 for (j = 0; si->sections[j]; j++) { 425 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 426 <= 0) { 427#ifdef CDF_DEBUG 428 fprintf(stderr, "Can't read %s\n", 429 si->sections[j]); 430#endif 431 break; 432 } 433 } 434 if (si->sections[j] != NULL) 435 continue; 436 if (NOTMIME(ms)) { 437 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 438 return -1; 439 } else { 440 if (file_printf(ms, "application/CDFV2-%s", 441 si->mime) == -1) 442 return -1; 443 } 444 return 1; 445 } 446 return -1; 447} 448 449protected int 450file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 451 size_t nbytes) 452{ 453 cdf_info_t info; 454 cdf_header_t h; 455 cdf_sat_t sat, ssat; 456 cdf_stream_t sst, scn; 457 cdf_dir_t dir; 458 int i; 459 const char *expn = ""; 460 const cdf_directory_t *root_storage; 461 462 info.i_fd = fd; 463 info.i_buf = buf; 464 info.i_len = nbytes; 465 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 466 return 0; 467 if (cdf_read_header(&info, &h) == -1) 468 return 0; 469#ifdef CDF_DEBUG 470 cdf_dump_header(&h); 471#endif 472 473 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 474 expn = "Can't read SAT"; 475 goto out0; 476 } 477#ifdef CDF_DEBUG 478 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 479#endif 480 481 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 482 expn = "Can't read SSAT"; 483 goto out1; 484 } 485#ifdef CDF_DEBUG 486 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 487#endif 488 489 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 490 expn = "Can't read directory"; 491 goto out2; 492 } 493 494 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 495 &root_storage)) == -1) { 496 expn = "Cannot read short stream"; 497 goto out3; 498 } 499#ifdef CDF_DEBUG 500 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 501#endif 502#ifdef notdef 503 if (root_storage) { 504 if (NOTMIME(ms)) { 505 char clsbuf[128]; 506 if (file_printf(ms, "CLSID %s, ", 507 format_clsid(clsbuf, sizeof(clsbuf), 508 root_storage->d_storage_uuid)) == -1) 509 return -1; 510 } 511 } 512#endif 513 514 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 515 "FileHeader", &scn)) != -1) { 516#define HWP5_SIGNATURE "HWP Document File" 517 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 518 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 519 sizeof(HWP5_SIGNATURE) - 1) == 0) { 520 if (NOTMIME(ms)) { 521 if (file_printf(ms, 522 "Hangul (Korean) Word Processor File 5.x") == -1) 523 return -1; 524 } else { 525 if (file_printf(ms, "application/x-hwp") == -1) 526 return -1; 527 } 528 i = 1; 529 goto out5; 530 } else { 531 free(scn.sst_tab); 532 scn.sst_tab = NULL; 533 scn.sst_len = 0; 534 scn.sst_dirlen = 0; 535 } 536 } 537 538 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 539 &scn)) == -1) { 540 if (errno != ESRCH) { 541 expn = "Cannot read summary info"; 542 goto out4; 543 } 544 i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst, 545 &dir, &scn); 546 if (i > 0) 547 goto out4; 548 i = cdf_file_dir_info(ms, &dir); 549 if (i < 0) 550 expn = "Cannot read section info"; 551 goto out4; 552 } 553 554 555#ifdef CDF_DEBUG 556 cdf_dump_summary_info(&h, &scn); 557#endif 558 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0) 559 expn = "Can't expand summary_info"; 560 561 if (i == 0) { 562 const char *str = NULL; 563 cdf_directory_t *d; 564 char name[__arraycount(d->d_name)]; 565 size_t j, k; 566 567 for (j = 0; str == NULL && j < dir.dir_len; j++) { 568 d = &dir.dir_tab[j]; 569 for (k = 0; k < sizeof(name); k++) 570 name[k] = (char)cdf_tole2(d->d_name[k]); 571 str = cdf_app_to_mime(name, 572 NOTMIME(ms) ? name2desc : name2mime); 573 } 574 if (NOTMIME(ms)) { 575 if (str != NULL) { 576 if (file_printf(ms, "%s", str) == -1) 577 return -1; 578 i = 1; 579 } 580 } else { 581 if (str == NULL) 582 str = "vnd.ms-office"; 583 if (file_printf(ms, "application/%s", str) == -1) 584 return -1; 585 i = 1; 586 } 587 } 588out5: 589 free(scn.sst_tab); 590out4: 591 free(sst.sst_tab); 592out3: 593 free(dir.dir_tab); 594out2: 595 free(ssat.sat_tab); 596out1: 597 free(sat.sat_tab); 598out0: 599 if (i == -1) { 600 if (NOTMIME(ms)) { 601 if (file_printf(ms, 602 "Composite Document File V2 Document") == -1) 603 return -1; 604 if (*expn) 605 if (file_printf(ms, ", %s", expn) == -1) 606 return -1; 607 } else { 608 if (file_printf(ms, "application/CDFV2-unknown") == -1) 609 return -1; 610 } 611 i = 1; 612 } 613 return i; 614} 615