readcdf.c revision 284778
1/*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26#include "file.h"
27
28#ifndef lint
29FILE_RCSID("@(#)$File: readcdf.c,v 1.53 2015/04/09 20:01:41 christos Exp $")
30#endif
31
32#include <assert.h>
33#include <stdlib.h>
34#include <unistd.h>
35#include <string.h>
36#include <time.h>
37#include <ctype.h>
38
39#include "cdf.h"
40#include "magic.h"
41
42#ifndef __arraycount
43#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
44#endif
45
46#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
47
48static const struct nv {
49	const char *pattern;
50	const char *mime;
51} app2mime[] =  {
52	{ "Word",			"msword",		},
53	{ "Excel",			"vnd.ms-excel",		},
54	{ "Powerpoint",			"vnd.ms-powerpoint",	},
55	{ "Crystal Reports",		"x-rpt",		},
56	{ "Advanced Installer",		"vnd.ms-msi",		},
57	{ "InstallShield",		"vnd.ms-msi",		},
58	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
59	{ "NAnt",			"vnd.ms-msi",		},
60	{ "Windows Installer",		"vnd.ms-msi",		},
61	{ NULL,				NULL,			},
62}, name2mime[] = {
63	{ "WordDocument",		"msword",		},
64	{ "PowerPoint",			"vnd.ms-powerpoint",	},
65	{ "DigitalSignature",		"vnd.ms-msi",		},
66	{ NULL,				NULL,			},
67}, name2desc[] = {
68	{ "WordDocument",		"Microsoft Office Word",},
69	{ "PowerPoint",			"Microsoft PowerPoint",	},
70	{ "DigitalSignature",		"Microsoft Installer",	},
71	{ NULL,				NULL,			},
72};
73
74static const struct cv {
75	uint64_t clsid[2];
76	const char *mime;
77} clsid2mime[] = {
78	{
79		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
80		"x-msi",
81	},
82	{	{ 0,			 0			},
83		NULL,
84	},
85}, clsid2desc[] = {
86	{
87		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
88		"MSI Installer",
89	},
90	{	{ 0,			 0			},
91		NULL,
92	},
93};
94
95private const char *
96cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
97{
98	size_t i;
99	for (i = 0; cv[i].mime != NULL; i++) {
100		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
101			return cv[i].mime;
102	}
103#ifdef CDF_DEBUG
104	fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
105	    clsid[1]);
106#endif
107	return NULL;
108}
109
110private const char *
111cdf_app_to_mime(const char *vbuf, const struct nv *nv)
112{
113	size_t i;
114	const char *rv = NULL;
115#ifdef USE_C_LOCALE
116	locale_t old_lc_ctype, c_lc_ctype;
117
118	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
119	assert(c_lc_ctype != NULL);
120	old_lc_ctype = uselocale(c_lc_ctype);
121	assert(old_lc_ctype != NULL);
122#endif
123	for (i = 0; nv[i].pattern != NULL; i++)
124		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
125			rv = nv[i].mime;
126			break;
127		}
128#ifdef CDF_DEBUG
129	fprintf(stderr, "unknown app %s\n", vbuf);
130#endif
131#ifdef USE_C_LOCALE
132	(void)uselocale(old_lc_ctype);
133	freelocale(c_lc_ctype);
134#endif
135	return rv;
136}
137
138private int
139cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
140    size_t count, const cdf_directory_t *root_storage)
141{
142        size_t i;
143        cdf_timestamp_t tp;
144        struct timespec ts;
145        char buf[64];
146        const char *str = NULL;
147        const char *s;
148        int len;
149
150        if (!NOTMIME(ms) && root_storage)
151		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
152		    clsid2mime);
153
154        for (i = 0; i < count; i++) {
155                cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
156                switch (info[i].pi_type) {
157                case CDF_NULL:
158                        break;
159                case CDF_SIGNED16:
160                        if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
161                            info[i].pi_s16) == -1)
162                                return -1;
163                        break;
164                case CDF_SIGNED32:
165                        if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
166                            info[i].pi_s32) == -1)
167                                return -1;
168                        break;
169                case CDF_UNSIGNED32:
170                        if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
171                            info[i].pi_u32) == -1)
172                                return -1;
173                        break;
174                case CDF_FLOAT:
175                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
176                            info[i].pi_f) == -1)
177                                return -1;
178                        break;
179                case CDF_DOUBLE:
180                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
181                            info[i].pi_d) == -1)
182                                return -1;
183                        break;
184                case CDF_LENGTH32_STRING:
185                case CDF_LENGTH32_WSTRING:
186                        len = info[i].pi_str.s_len;
187                        if (len > 1) {
188                                char vbuf[1024];
189                                size_t j, k = 1;
190
191                                if (info[i].pi_type == CDF_LENGTH32_WSTRING)
192                                    k++;
193                                s = info[i].pi_str.s_buf;
194                                for (j = 0; j < sizeof(vbuf) && len--; s += k) {
195                                        if (*s == '\0')
196                                                break;
197                                        if (isprint((unsigned char)*s))
198                                                vbuf[j++] = *s;
199                                }
200                                if (j == sizeof(vbuf))
201                                        --j;
202                                vbuf[j] = '\0';
203                                if (NOTMIME(ms)) {
204                                        if (vbuf[0]) {
205                                                if (file_printf(ms, ", %s: %s",
206                                                    buf, vbuf) == -1)
207                                                        return -1;
208                                        }
209                                } else if (str == NULL && info[i].pi_id ==
210				    CDF_PROPERTY_NAME_OF_APPLICATION) {
211					str = cdf_app_to_mime(vbuf, app2mime);
212				}
213			}
214                        break;
215                case CDF_FILETIME:
216                        tp = info[i].pi_tp;
217                        if (tp != 0) {
218				char tbuf[64];
219                                if (tp < 1000000000000000LL) {
220                                        cdf_print_elapsed_time(tbuf,
221                                            sizeof(tbuf), tp);
222                                        if (NOTMIME(ms) && file_printf(ms,
223                                            ", %s: %s", buf, tbuf) == -1)
224                                                return -1;
225                                } else {
226                                        char *c, *ec;
227                                        cdf_timestamp_to_timespec(&ts, tp);
228                                        c = cdf_ctime(&ts.tv_sec, tbuf);
229                                        if (c != NULL &&
230					    (ec = strchr(c, '\n')) != NULL)
231						*ec = '\0';
232
233                                        if (NOTMIME(ms) && file_printf(ms,
234                                            ", %s: %s", buf, c) == -1)
235                                                return -1;
236                                }
237                        }
238                        break;
239                case CDF_CLIPBOARD:
240                        break;
241                default:
242                        return -1;
243                }
244        }
245        if (!NOTMIME(ms)) {
246		if (str == NULL)
247			return 0;
248                if (file_printf(ms, "application/%s", str) == -1)
249                        return -1;
250        }
251        return 1;
252}
253
254private int
255cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
256    const cdf_stream_t *sst)
257{
258	cdf_catalog_t *cat;
259	size_t i;
260	char buf[256];
261	cdf_catalog_entry_t *ce;
262
263        if (NOTMIME(ms)) {
264		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
265			return -1;
266		if (cdf_unpack_catalog(h, sst, &cat) == -1)
267			return -1;
268		ce = cat->cat_e;
269		/* skip first entry since it has a , or paren */
270		for (i = 1; i < cat->cat_num; i++)
271			if (file_printf(ms, "%s%s",
272			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
273			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
274				free(cat);
275				return -1;
276			}
277		free(cat);
278	} else {
279		if (file_printf(ms, "application/CDFV2") == -1)
280			return -1;
281	}
282	return 1;
283}
284
285private int
286cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
287    const cdf_stream_t *sst, const cdf_directory_t *root_storage)
288{
289        cdf_summary_info_header_t si;
290        cdf_property_info_t *info;
291        size_t count;
292        int m;
293
294        if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
295                return -1;
296
297        if (NOTMIME(ms)) {
298		const char *str;
299
300                if (file_printf(ms, "Composite Document File V2 Document")
301		    == -1)
302                        return -1;
303
304                if (file_printf(ms, ", %s Endian",
305                    si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
306                        return -2;
307                switch (si.si_os) {
308                case 2:
309                        if (file_printf(ms, ", Os: Windows, Version %d.%d",
310                            si.si_os_version & 0xff,
311                            (uint32_t)si.si_os_version >> 8) == -1)
312                                return -2;
313                        break;
314                case 1:
315                        if (file_printf(ms, ", Os: MacOS, Version %d.%d",
316                            (uint32_t)si.si_os_version >> 8,
317                            si.si_os_version & 0xff) == -1)
318                                return -2;
319                        break;
320                default:
321                        if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
322                            si.si_os_version & 0xff,
323                            (uint32_t)si.si_os_version >> 8) == -1)
324                                return -2;
325                        break;
326                }
327		if (root_storage) {
328			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
329			    clsid2desc);
330			if (str) {
331				if (file_printf(ms, ", %s", str) == -1)
332					return -2;
333			}
334		}
335	}
336
337        m = cdf_file_property_info(ms, info, count, root_storage);
338        free(info);
339
340        return m == -1 ? -2 : m;
341}
342
343#ifdef notdef
344private char *
345format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
346	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
347	    PRIx64 "-%.12" PRIx64,
348	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
349	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
350	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
351	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
352	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
353	return buf;
354}
355#endif
356
357private int
358cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
359    const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
360    const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
361{
362	int i;
363
364	if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
365	    dir, "Catalog", scn)) == -1)
366		return i;
367#ifdef CDF_DEBUG
368	cdf_dump_catalog(&h, &scn);
369#endif
370	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
371		return -1;
372	return i;
373}
374
375private struct sinfo {
376	const char *name;
377	const char *mime;
378	const char *sections[5];
379	const int  types[5];
380} sectioninfo[] = {
381	{ "Encrypted", "encrypted",
382		{
383			"EncryptedPackage", NULL, NULL, NULL, NULL,
384		},
385		{
386			CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
387
388		},
389	},
390	{ "QuickBooks", "quickbooks",
391		{
392#if 0
393			"TaxForms", "PDFTaxForms", "modulesInBackup",
394#endif
395			"mfbu_header", NULL, NULL, NULL, NULL,
396		},
397		{
398#if 0
399			CDF_DIR_TYPE_USER_STORAGE,
400			CDF_DIR_TYPE_USER_STORAGE,
401			CDF_DIR_TYPE_USER_STREAM,
402#endif
403			CDF_DIR_TYPE_USER_STREAM,
404			0, 0, 0, 0
405		},
406	},
407};
408
409private int
410cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
411{
412	size_t sd, j;
413
414	for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
415		const struct sinfo *si = &sectioninfo[sd];
416		for (j = 0; si->sections[j]; j++) {
417			if (cdf_find_stream(dir, si->sections[j], si->types[j])
418			    <= 0) {
419#ifdef CDF_DEBUG
420				fprintf(stderr, "Can't read %s\n",
421				    si->sections[j]);
422#endif
423				break;
424			}
425		}
426		if (si->sections[j] != NULL)
427			continue;
428		if (NOTMIME(ms)) {
429			if (file_printf(ms, "CDFV2 %s", si->name) == -1)
430				return -1;
431		} else {
432			if (file_printf(ms, "application/CDFV2-%s",
433			    si->mime) == -1)
434				return -1;
435		}
436		return 1;
437	}
438	return -1;
439}
440
441protected int
442file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
443    size_t nbytes)
444{
445        cdf_info_t info;
446        cdf_header_t h;
447        cdf_sat_t sat, ssat;
448        cdf_stream_t sst, scn;
449        cdf_dir_t dir;
450        int i;
451        const char *expn = "";
452        const cdf_directory_t *root_storage;
453
454        info.i_fd = fd;
455        info.i_buf = buf;
456        info.i_len = nbytes;
457        if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
458                return 0;
459        if (cdf_read_header(&info, &h) == -1)
460                return 0;
461#ifdef CDF_DEBUG
462        cdf_dump_header(&h);
463#endif
464
465        if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
466                expn = "Can't read SAT";
467                goto out0;
468        }
469#ifdef CDF_DEBUG
470        cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
471#endif
472
473        if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
474                expn = "Can't read SSAT";
475                goto out1;
476        }
477#ifdef CDF_DEBUG
478        cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
479#endif
480
481        if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
482                expn = "Can't read directory";
483                goto out2;
484        }
485
486        if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
487	    &root_storage)) == -1) {
488                expn = "Cannot read short stream";
489                goto out3;
490        }
491#ifdef CDF_DEBUG
492        cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
493#endif
494#ifdef notdef
495	if (root_storage) {
496		if (NOTMIME(ms)) {
497			char clsbuf[128];
498			if (file_printf(ms, "CLSID %s, ",
499			    format_clsid(clsbuf, sizeof(clsbuf),
500			    root_storage->d_storage_uuid)) == -1)
501				return -1;
502		}
503	}
504#endif
505
506	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
507	    "FileHeader", &scn)) != -1) {
508#define HWP5_SIGNATURE "HWP Document File"
509		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
510		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
511		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
512		    if (NOTMIME(ms)) {
513			if (file_printf(ms,
514			    "Hangul (Korean) Word Processor File 5.x") == -1)
515			    return -1;
516		    } else {
517			if (file_printf(ms, "application/x-hwp") == -1)
518			    return -1;
519		    }
520		    i = 1;
521		    goto out5;
522		} else {
523		    free(scn.sst_tab);
524		    scn.sst_tab = NULL;
525		    scn.sst_len = 0;
526		    scn.sst_dirlen = 0;
527		}
528	}
529
530        if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
531            &scn)) == -1) {
532                if (errno != ESRCH) {
533                        expn = "Cannot read summary info";
534			goto out4;
535		}
536		i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
537		    &dir, &scn);
538		if (i > 0)
539			goto out4;
540		i = cdf_file_dir_info(ms, &dir);
541		if (i < 0)
542                        expn = "Cannot read section info";
543		goto out4;
544	}
545
546
547#ifdef CDF_DEBUG
548        cdf_dump_summary_info(&h, &scn);
549#endif
550        if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
551            expn = "Can't expand summary_info";
552
553	if (i == 0) {
554		const char *str = NULL;
555		cdf_directory_t *d;
556		char name[__arraycount(d->d_name)];
557		size_t j, k;
558
559		for (j = 0; str == NULL && j < dir.dir_len; j++) {
560			d = &dir.dir_tab[j];
561			for (k = 0; k < sizeof(name); k++)
562				name[k] = (char)cdf_tole2(d->d_name[k]);
563			str = cdf_app_to_mime(name,
564			    NOTMIME(ms) ? name2desc : name2mime);
565		}
566		if (NOTMIME(ms)) {
567			if (str != NULL) {
568				if (file_printf(ms, "%s", str) == -1)
569					return -1;
570				i = 1;
571			}
572		} else {
573			if (str == NULL)
574				str = "vnd.ms-office";
575			if (file_printf(ms, "application/%s", str) == -1)
576				return -1;
577			i = 1;
578		}
579	}
580out5:
581        free(scn.sst_tab);
582out4:
583        free(sst.sst_tab);
584out3:
585        free(dir.dir_tab);
586out2:
587        free(ssat.sat_tab);
588out1:
589        free(sat.sat_tab);
590out0:
591	if (i == -1) {
592	    if (NOTMIME(ms)) {
593		if (file_printf(ms,
594		    "Composite Document File V2 Document") == -1)
595		    return -1;
596		if (*expn)
597		    if (file_printf(ms, ", %s", expn) == -1)
598			return -1;
599	    } else {
600		if (file_printf(ms, "application/CDFV2-unknown") == -1)
601		    return -1;
602	    }
603	    i = 1;
604	}
605        return i;
606}
607