readcdf.c revision 328875
1/*-
2 * Copyright (c) 2008, 2016 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26#include "file.h"
27
28#ifndef lint
29FILE_RCSID("@(#)$File: readcdf.c,v 1.65 2017/04/08 20:58:03 christos Exp $")
30#endif
31
32#include <assert.h>
33#include <stdlib.h>
34#include <unistd.h>
35#include <string.h>
36#include <time.h>
37#include <ctype.h>
38
39#include "cdf.h"
40#include "magic.h"
41
42#ifndef __arraycount
43#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
44#endif
45
46#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
47
48static const struct nv {
49	const char *pattern;
50	const char *mime;
51} app2mime[] =  {
52	{ "Word",			"msword",		},
53	{ "Excel",			"vnd.ms-excel",		},
54	{ "Powerpoint",			"vnd.ms-powerpoint",	},
55	{ "Crystal Reports",		"x-rpt",		},
56	{ "Advanced Installer",		"vnd.ms-msi",		},
57	{ "InstallShield",		"vnd.ms-msi",		},
58	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
59	{ "NAnt",			"vnd.ms-msi",		},
60	{ "Windows Installer",		"vnd.ms-msi",		},
61	{ NULL,				NULL,			},
62}, name2mime[] = {
63	{ "Book",			"vnd.ms-excel",		},
64	{ "Workbook",			"vnd.ms-excel",		},
65	{ "WordDocument",		"msword",		},
66	{ "PowerPoint",			"vnd.ms-powerpoint",	},
67	{ "DigitalSignature",		"vnd.ms-msi",		},
68	{ NULL,				NULL,			},
69}, name2desc[] = {
70	{ "Book",			"Microsoft Excel",	},
71	{ "Workbook",			"Microsoft Excel",	},
72	{ "WordDocument",		"Microsoft Word",	},
73	{ "PowerPoint",			"Microsoft PowerPoint",	},
74	{ "DigitalSignature",		"Microsoft Installer",	},
75	{ NULL,				NULL,			},
76};
77
78static const struct cv {
79	uint64_t clsid[2];
80	const char *mime;
81} clsid2mime[] = {
82	{
83		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
84		"x-msi",
85	},
86	{	{ 0,			 0			},
87		NULL,
88	},
89}, clsid2desc[] = {
90	{
91		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
92		"MSI Installer",
93	},
94	{	{ 0,			 0			},
95		NULL,
96	},
97};
98
99private const char *
100cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
101{
102	size_t i;
103	for (i = 0; cv[i].mime != NULL; i++) {
104		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
105			return cv[i].mime;
106	}
107#ifdef CDF_DEBUG
108	fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
109	    clsid[1]);
110#endif
111	return NULL;
112}
113
114private const char *
115cdf_app_to_mime(const char *vbuf, const struct nv *nv)
116{
117	size_t i;
118	const char *rv = NULL;
119#ifdef USE_C_LOCALE
120	locale_t old_lc_ctype, c_lc_ctype;
121
122	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
123	assert(c_lc_ctype != NULL);
124	old_lc_ctype = uselocale(c_lc_ctype);
125	assert(old_lc_ctype != NULL);
126#else
127	char *old_lc_ctype = setlocale(LC_CTYPE, "C");
128#endif
129	for (i = 0; nv[i].pattern != NULL; i++)
130		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
131			rv = nv[i].mime;
132			break;
133		}
134#ifdef CDF_DEBUG
135	fprintf(stderr, "unknown app %s\n", vbuf);
136#endif
137#ifdef USE_C_LOCALE
138	(void)uselocale(old_lc_ctype);
139	freelocale(c_lc_ctype);
140#else
141	setlocale(LC_CTYPE, old_lc_ctype);
142#endif
143	return rv;
144}
145
146private int
147cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
148    size_t count, const cdf_directory_t *root_storage)
149{
150        size_t i;
151        cdf_timestamp_t tp;
152        struct timespec ts;
153        char buf[64];
154        const char *str = NULL;
155        const char *s, *e;
156        int len;
157
158        if (!NOTMIME(ms) && root_storage)
159		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
160		    clsid2mime);
161
162        for (i = 0; i < count; i++) {
163                cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
164                switch (info[i].pi_type) {
165                case CDF_NULL:
166                        break;
167                case CDF_SIGNED16:
168                        if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
169                            info[i].pi_s16) == -1)
170                                return -1;
171                        break;
172                case CDF_SIGNED32:
173                        if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
174                            info[i].pi_s32) == -1)
175                                return -1;
176                        break;
177                case CDF_UNSIGNED32:
178                        if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
179                            info[i].pi_u32) == -1)
180                                return -1;
181                        break;
182                case CDF_FLOAT:
183                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
184                            info[i].pi_f) == -1)
185                                return -1;
186                        break;
187                case CDF_DOUBLE:
188                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
189                            info[i].pi_d) == -1)
190                                return -1;
191                        break;
192                case CDF_LENGTH32_STRING:
193                case CDF_LENGTH32_WSTRING:
194                        len = info[i].pi_str.s_len;
195                        if (len > 1) {
196                                char vbuf[1024];
197                                size_t j, k = 1;
198
199                                if (info[i].pi_type == CDF_LENGTH32_WSTRING)
200                                    k++;
201                                s = info[i].pi_str.s_buf;
202				e = info[i].pi_str.s_buf + len;
203                                for (j = 0; s < e && j < sizeof(vbuf)
204				    && len--; s += k) {
205                                        if (*s == '\0')
206                                                break;
207                                        if (isprint((unsigned char)*s))
208                                                vbuf[j++] = *s;
209                                }
210                                if (j == sizeof(vbuf))
211                                        --j;
212                                vbuf[j] = '\0';
213                                if (NOTMIME(ms)) {
214                                        if (vbuf[0]) {
215                                                if (file_printf(ms, ", %s: %s",
216                                                    buf, vbuf) == -1)
217                                                        return -1;
218                                        }
219                                } else if (str == NULL && info[i].pi_id ==
220				    CDF_PROPERTY_NAME_OF_APPLICATION) {
221					str = cdf_app_to_mime(vbuf, app2mime);
222				}
223			}
224                        break;
225                case CDF_FILETIME:
226                        tp = info[i].pi_tp;
227                        if (tp != 0) {
228				char tbuf[64];
229                                if (tp < 1000000000000000LL) {
230                                        cdf_print_elapsed_time(tbuf,
231                                            sizeof(tbuf), tp);
232                                        if (NOTMIME(ms) && file_printf(ms,
233                                            ", %s: %s", buf, tbuf) == -1)
234                                                return -1;
235                                } else {
236                                        char *c, *ec;
237                                        cdf_timestamp_to_timespec(&ts, tp);
238                                        c = cdf_ctime(&ts.tv_sec, tbuf);
239                                        if (c != NULL &&
240					    (ec = strchr(c, '\n')) != NULL)
241						*ec = '\0';
242
243                                        if (NOTMIME(ms) && file_printf(ms,
244                                            ", %s: %s", buf, c) == -1)
245                                                return -1;
246                                }
247                        }
248                        break;
249                case CDF_CLIPBOARD:
250                        break;
251                default:
252                        return -1;
253                }
254        }
255        if (!NOTMIME(ms)) {
256		if (str == NULL)
257			return 0;
258                if (file_printf(ms, "application/%s", str) == -1)
259                        return -1;
260        }
261        return 1;
262}
263
264private int
265cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
266    const cdf_stream_t *sst)
267{
268	cdf_catalog_t *cat;
269	size_t i;
270	char buf[256];
271	cdf_catalog_entry_t *ce;
272
273        if (NOTMIME(ms)) {
274		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
275			return -1;
276		if (cdf_unpack_catalog(h, sst, &cat) == -1)
277			return -1;
278		ce = cat->cat_e;
279		/* skip first entry since it has a , or paren */
280		for (i = 1; i < cat->cat_num; i++)
281			if (file_printf(ms, "%s%s",
282			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
283			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
284				free(cat);
285				return -1;
286			}
287		free(cat);
288	} else {
289		if (file_printf(ms, "application/CDFV2") == -1)
290			return -1;
291	}
292	return 1;
293}
294
295private int
296cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
297    const cdf_stream_t *sst, const cdf_directory_t *root_storage)
298{
299        cdf_summary_info_header_t si;
300        cdf_property_info_t *info;
301        size_t count;
302        int m;
303
304        if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
305                return -1;
306
307        if (NOTMIME(ms)) {
308		const char *str;
309
310                if (file_printf(ms, "Composite Document File V2 Document")
311		    == -1)
312                        return -1;
313
314                if (file_printf(ms, ", %s Endian",
315                    si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
316                        return -2;
317                switch (si.si_os) {
318                case 2:
319                        if (file_printf(ms, ", Os: Windows, Version %d.%d",
320                            si.si_os_version & 0xff,
321                            (uint32_t)si.si_os_version >> 8) == -1)
322                                return -2;
323                        break;
324                case 1:
325                        if (file_printf(ms, ", Os: MacOS, Version %d.%d",
326                            (uint32_t)si.si_os_version >> 8,
327                            si.si_os_version & 0xff) == -1)
328                                return -2;
329                        break;
330                default:
331                        if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
332                            si.si_os_version & 0xff,
333                            (uint32_t)si.si_os_version >> 8) == -1)
334                                return -2;
335                        break;
336                }
337		if (root_storage) {
338			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
339			    clsid2desc);
340			if (str) {
341				if (file_printf(ms, ", %s", str) == -1)
342					return -2;
343			}
344		}
345	}
346
347        m = cdf_file_property_info(ms, info, count, root_storage);
348        free(info);
349
350        return m == -1 ? -2 : m;
351}
352
353#ifdef notdef
354private char *
355format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
356	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
357	    PRIx64 "-%.12" PRIx64,
358	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
359	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
360	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
361	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
362	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
363	return buf;
364}
365#endif
366
367private int
368cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
369    const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
370    const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
371{
372	int i;
373
374	if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
375	    dir, "Catalog", scn)) == -1)
376		return i;
377#ifdef CDF_DEBUG
378	cdf_dump_catalog(h, scn);
379#endif
380	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
381		return -1;
382	return i;
383}
384
385private int
386cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info,
387    const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
388    const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn,
389    const cdf_directory_t *root_storage, const char **expn)
390{
391	int i;
392	const char *str = NULL;
393	cdf_directory_t *d;
394	char name[__arraycount(d->d_name)];
395	size_t j, k;
396
397#ifdef CDF_DEBUG
398        cdf_dump_summary_info(h, scn);
399#endif
400        if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) {
401            *expn = "Can't expand summary_info";
402	    return i;
403	}
404	if (i == 1)
405		return i;
406	for (j = 0; str == NULL && j < dir->dir_len; j++) {
407		d = &dir->dir_tab[j];
408		for (k = 0; k < sizeof(name); k++)
409			name[k] = (char)cdf_tole2(d->d_name[k]);
410		str = cdf_app_to_mime(name,
411				      NOTMIME(ms) ? name2desc : name2mime);
412	}
413	if (NOTMIME(ms)) {
414		if (str != NULL) {
415			if (file_printf(ms, "%s", str) == -1)
416				return -1;
417			i = 1;
418		}
419	} else {
420		if (str == NULL)
421			str = "vnd.ms-office";
422		if (file_printf(ms, "application/%s", str) == -1)
423			return -1;
424		i = 1;
425	}
426	if (i <= 0) {
427		i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst,
428					  dir, scn);
429	}
430	return i;
431}
432
433private struct sinfo {
434	const char *name;
435	const char *mime;
436	const char *sections[5];
437	const int  types[5];
438} sectioninfo[] = {
439	{ "Encrypted", "encrypted",
440		{
441			"EncryptedPackage", "EncryptedSummary",
442			NULL, NULL, NULL,
443		},
444		{
445			CDF_DIR_TYPE_USER_STREAM,
446			CDF_DIR_TYPE_USER_STREAM,
447			0, 0, 0,
448
449		},
450	},
451	{ "QuickBooks", "quickbooks",
452		{
453#if 0
454			"TaxForms", "PDFTaxForms", "modulesInBackup",
455#endif
456			"mfbu_header", NULL, NULL, NULL, NULL,
457		},
458		{
459#if 0
460			CDF_DIR_TYPE_USER_STORAGE,
461			CDF_DIR_TYPE_USER_STORAGE,
462			CDF_DIR_TYPE_USER_STREAM,
463#endif
464			CDF_DIR_TYPE_USER_STREAM,
465			0, 0, 0, 0
466		},
467	},
468	{ "Microsoft Excel", "vnd.ms-excel",
469		{
470			"Book", "Workbook", NULL, NULL, NULL,
471		},
472		{
473			CDF_DIR_TYPE_USER_STREAM,
474			CDF_DIR_TYPE_USER_STREAM,
475			0, 0, 0,
476		},
477	},
478	{ "Microsoft Word", "msword",
479		{
480			"WordDocument", NULL, NULL, NULL, NULL,
481		},
482		{
483			CDF_DIR_TYPE_USER_STREAM,
484			0, 0, 0, 0,
485		},
486	},
487	{ "Microsoft PowerPoint", "vnd.ms-powerpoint",
488		{
489			"PowerPoint", NULL, NULL, NULL, NULL,
490		},
491		{
492			CDF_DIR_TYPE_USER_STREAM,
493			0, 0, 0, 0,
494		},
495	},
496	{ "Microsoft Outlook Message", "vnd.ms-outlook",
497		{
498			"__properties_version1.0",
499			"__recip_version1.0_#00000000",
500			NULL, NULL, NULL,
501		},
502		{
503			CDF_DIR_TYPE_USER_STREAM,
504			CDF_DIR_TYPE_USER_STORAGE,
505			0, 0, 0,
506		},
507	},
508};
509
510private int
511cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
512{
513	size_t sd, j;
514
515	for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
516		const struct sinfo *si = &sectioninfo[sd];
517		for (j = 0; si->sections[j]; j++) {
518			if (cdf_find_stream(dir, si->sections[j], si->types[j])
519			    > 0)
520				break;
521#ifdef CDF_DEBUG
522			fprintf(stderr, "Can't read %s\n", si->sections[j]);
523#endif
524		}
525		if (si->sections[j] == NULL)
526			continue;
527		if (NOTMIME(ms)) {
528			if (file_printf(ms, "CDFV2 %s", si->name) == -1)
529				return -1;
530		} else {
531			if (file_printf(ms, "application/%s", si->mime) == -1)
532				return -1;
533		}
534		return 1;
535	}
536	return -1;
537}
538
539protected int
540file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
541    size_t nbytes)
542{
543        cdf_info_t info;
544        cdf_header_t h;
545        cdf_sat_t sat, ssat;
546        cdf_stream_t sst, scn;
547        cdf_dir_t dir;
548        int i;
549        const char *expn = "";
550        const cdf_directory_t *root_storage;
551
552        scn.sst_tab = NULL;
553        info.i_fd = fd;
554        info.i_buf = buf;
555        info.i_len = nbytes;
556        if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
557                return 0;
558        if (cdf_read_header(&info, &h) == -1)
559                return 0;
560#ifdef CDF_DEBUG
561        cdf_dump_header(&h);
562#endif
563
564        if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
565                expn = "Can't read SAT";
566                goto out0;
567        }
568#ifdef CDF_DEBUG
569        cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
570#endif
571
572        if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
573                expn = "Can't read SSAT";
574                goto out1;
575        }
576#ifdef CDF_DEBUG
577        cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
578#endif
579
580        if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
581                expn = "Can't read directory";
582                goto out2;
583        }
584
585        if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
586	    &root_storage)) == -1) {
587                expn = "Cannot read short stream";
588                goto out3;
589        }
590#ifdef CDF_DEBUG
591        cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
592#endif
593#ifdef notdef
594	if (root_storage) {
595		if (NOTMIME(ms)) {
596			char clsbuf[128];
597			if (file_printf(ms, "CLSID %s, ",
598			    format_clsid(clsbuf, sizeof(clsbuf),
599			    root_storage->d_storage_uuid)) == -1)
600				return -1;
601		}
602	}
603#endif
604
605	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
606	    "FileHeader", &scn)) != -1) {
607#define HWP5_SIGNATURE "HWP Document File"
608		if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1
609		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
610		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
611		    if (NOTMIME(ms)) {
612			if (file_printf(ms,
613			    "Hangul (Korean) Word Processor File 5.x") == -1)
614			    return -1;
615		    } else {
616			if (file_printf(ms, "application/x-hwp") == -1)
617			    return -1;
618		    }
619		    i = 1;
620		    goto out5;
621		} else {
622		    cdf_zero_stream(&scn);
623		}
624	}
625
626        if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
627            &scn)) == -1) {
628                if (errno != ESRCH) {
629                        expn = "Cannot read summary info";
630		}
631	} else {
632		i = cdf_check_summary_info(ms, &info, &h,
633		    &sat, &ssat, &sst, &dir, &scn, root_storage, &expn);
634		cdf_zero_stream(&scn);
635	}
636	if (i <= 0) {
637		if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat,
638		    &sst, &dir, &scn)) == -1) {
639			if (errno != ESRCH) {
640				expn = "Cannot read summary info";
641			}
642		} else {
643			i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat,
644			    &sst, &dir, &scn, root_storage, &expn);
645		}
646	}
647	if (i <= 0) {
648		i = cdf_file_dir_info(ms, &dir);
649		if (i < 0)
650			expn = "Cannot read section info";
651	}
652out5:
653	cdf_zero_stream(&scn);
654	cdf_zero_stream(&sst);
655out3:
656        free(dir.dir_tab);
657out2:
658        free(ssat.sat_tab);
659out1:
660        free(sat.sat_tab);
661out0:
662	if (i == -1) {
663	    if (NOTMIME(ms)) {
664		if (file_printf(ms,
665		    "Composite Document File V2 Document") == -1)
666		    return -1;
667		if (*expn)
668		    if (file_printf(ms, ", %s", expn) == -1)
669			return -1;
670	    } else {
671		if (file_printf(ms, "application/CDFV2") == -1)
672		    return -1;
673	    }
674	    i = 1;
675	}
676        return i;
677}
678