readcdf.c revision 300899
1/*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26#include "file.h"
27
28#ifndef lint
29FILE_RCSID("@(#)$File: readcdf.c,v 1.57 2016/05/03 16:08:49 christos Exp $")
30#endif
31
32#include <assert.h>
33#include <stdlib.h>
34#include <unistd.h>
35#include <string.h>
36#include <time.h>
37#include <ctype.h>
38
39#include "cdf.h"
40#include "magic.h"
41
42#ifndef __arraycount
43#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
44#endif
45
46#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
47
48static const struct nv {
49	const char *pattern;
50	const char *mime;
51} app2mime[] =  {
52	{ "Word",			"msword",		},
53	{ "Excel",			"vnd.ms-excel",		},
54	{ "Powerpoint",			"vnd.ms-powerpoint",	},
55	{ "Crystal Reports",		"x-rpt",		},
56	{ "Advanced Installer",		"vnd.ms-msi",		},
57	{ "InstallShield",		"vnd.ms-msi",		},
58	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
59	{ "NAnt",			"vnd.ms-msi",		},
60	{ "Windows Installer",		"vnd.ms-msi",		},
61	{ NULL,				NULL,			},
62}, name2mime[] = {
63	{ "Book",			"vnd.ms-excel",		},
64	{ "Workbook",			"vnd.ms-excel",		},
65	{ "WordDocument",		"msword",		},
66	{ "PowerPoint",			"vnd.ms-powerpoint",	},
67	{ "DigitalSignature",		"vnd.ms-msi",		},
68	{ NULL,				NULL,			},
69}, name2desc[] = {
70	{ "Book",			"Microsoft Excel",	},
71	{ "Workbook",			"Microsoft Excel",	},
72	{ "WordDocument",		"Microsoft Word",	},
73	{ "PowerPoint",			"Microsoft PowerPoint",	},
74	{ "DigitalSignature",		"Microsoft Installer",	},
75	{ NULL,				NULL,			},
76};
77
78static const struct cv {
79	uint64_t clsid[2];
80	const char *mime;
81} clsid2mime[] = {
82	{
83		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
84		"x-msi",
85	},
86	{	{ 0,			 0			},
87		NULL,
88	},
89}, clsid2desc[] = {
90	{
91		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
92		"MSI Installer",
93	},
94	{	{ 0,			 0			},
95		NULL,
96	},
97};
98
99private const char *
100cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
101{
102	size_t i;
103	for (i = 0; cv[i].mime != NULL; i++) {
104		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
105			return cv[i].mime;
106	}
107#ifdef CDF_DEBUG
108	fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
109	    clsid[1]);
110#endif
111	return NULL;
112}
113
114private const char *
115cdf_app_to_mime(const char *vbuf, const struct nv *nv)
116{
117	size_t i;
118	const char *rv = NULL;
119#ifdef USE_C_LOCALE
120	locale_t old_lc_ctype, c_lc_ctype;
121
122	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
123	assert(c_lc_ctype != NULL);
124	old_lc_ctype = uselocale(c_lc_ctype);
125	assert(old_lc_ctype != NULL);
126#else
127	char *old_lc_ctype = setlocale(LC_CTYPE, "C");
128#endif
129	for (i = 0; nv[i].pattern != NULL; i++)
130		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
131			rv = nv[i].mime;
132			break;
133		}
134#ifdef CDF_DEBUG
135	fprintf(stderr, "unknown app %s\n", vbuf);
136#endif
137#ifdef USE_C_LOCALE
138	(void)uselocale(old_lc_ctype);
139	freelocale(c_lc_ctype);
140#else
141	setlocale(LC_CTYPE, old_lc_ctype);
142#endif
143	return rv;
144}
145
146private int
147cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
148    size_t count, const cdf_directory_t *root_storage)
149{
150        size_t i;
151        cdf_timestamp_t tp;
152        struct timespec ts;
153        char buf[64];
154        const char *str = NULL;
155        const char *s;
156        int len;
157
158        if (!NOTMIME(ms) && root_storage)
159		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
160		    clsid2mime);
161
162        for (i = 0; i < count; i++) {
163                cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
164                switch (info[i].pi_type) {
165                case CDF_NULL:
166                        break;
167                case CDF_SIGNED16:
168                        if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
169                            info[i].pi_s16) == -1)
170                                return -1;
171                        break;
172                case CDF_SIGNED32:
173                        if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
174                            info[i].pi_s32) == -1)
175                                return -1;
176                        break;
177                case CDF_UNSIGNED32:
178                        if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
179                            info[i].pi_u32) == -1)
180                                return -1;
181                        break;
182                case CDF_FLOAT:
183                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
184                            info[i].pi_f) == -1)
185                                return -1;
186                        break;
187                case CDF_DOUBLE:
188                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
189                            info[i].pi_d) == -1)
190                                return -1;
191                        break;
192                case CDF_LENGTH32_STRING:
193                case CDF_LENGTH32_WSTRING:
194                        len = info[i].pi_str.s_len;
195                        if (len > 1) {
196                                char vbuf[1024];
197                                size_t j, k = 1;
198
199                                if (info[i].pi_type == CDF_LENGTH32_WSTRING)
200                                    k++;
201                                s = info[i].pi_str.s_buf;
202                                for (j = 0; j < sizeof(vbuf) && len--; s += k) {
203                                        if (*s == '\0')
204                                                break;
205                                        if (isprint((unsigned char)*s))
206                                                vbuf[j++] = *s;
207                                }
208                                if (j == sizeof(vbuf))
209                                        --j;
210                                vbuf[j] = '\0';
211                                if (NOTMIME(ms)) {
212                                        if (vbuf[0]) {
213                                                if (file_printf(ms, ", %s: %s",
214                                                    buf, vbuf) == -1)
215                                                        return -1;
216                                        }
217                                } else if (str == NULL && info[i].pi_id ==
218				    CDF_PROPERTY_NAME_OF_APPLICATION) {
219					str = cdf_app_to_mime(vbuf, app2mime);
220				}
221			}
222                        break;
223                case CDF_FILETIME:
224                        tp = info[i].pi_tp;
225                        if (tp != 0) {
226				char tbuf[64];
227                                if (tp < 1000000000000000LL) {
228                                        cdf_print_elapsed_time(tbuf,
229                                            sizeof(tbuf), tp);
230                                        if (NOTMIME(ms) && file_printf(ms,
231                                            ", %s: %s", buf, tbuf) == -1)
232                                                return -1;
233                                } else {
234                                        char *c, *ec;
235                                        cdf_timestamp_to_timespec(&ts, tp);
236                                        c = cdf_ctime(&ts.tv_sec, tbuf);
237                                        if (c != NULL &&
238					    (ec = strchr(c, '\n')) != NULL)
239						*ec = '\0';
240
241                                        if (NOTMIME(ms) && file_printf(ms,
242                                            ", %s: %s", buf, c) == -1)
243                                                return -1;
244                                }
245                        }
246                        break;
247                case CDF_CLIPBOARD:
248                        break;
249                default:
250                        return -1;
251                }
252        }
253        if (!NOTMIME(ms)) {
254		if (str == NULL)
255			return 0;
256                if (file_printf(ms, "application/%s", str) == -1)
257                        return -1;
258        }
259        return 1;
260}
261
262private int
263cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
264    const cdf_stream_t *sst)
265{
266	cdf_catalog_t *cat;
267	size_t i;
268	char buf[256];
269	cdf_catalog_entry_t *ce;
270
271        if (NOTMIME(ms)) {
272		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
273			return -1;
274		if (cdf_unpack_catalog(h, sst, &cat) == -1)
275			return -1;
276		ce = cat->cat_e;
277		/* skip first entry since it has a , or paren */
278		for (i = 1; i < cat->cat_num; i++)
279			if (file_printf(ms, "%s%s",
280			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
281			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
282				free(cat);
283				return -1;
284			}
285		free(cat);
286	} else {
287		if (file_printf(ms, "application/CDFV2") == -1)
288			return -1;
289	}
290	return 1;
291}
292
293private int
294cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
295    const cdf_stream_t *sst, const cdf_directory_t *root_storage)
296{
297        cdf_summary_info_header_t si;
298        cdf_property_info_t *info;
299        size_t count;
300        int m;
301
302        if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
303                return -1;
304
305        if (NOTMIME(ms)) {
306		const char *str;
307
308                if (file_printf(ms, "Composite Document File V2 Document")
309		    == -1)
310                        return -1;
311
312                if (file_printf(ms, ", %s Endian",
313                    si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
314                        return -2;
315                switch (si.si_os) {
316                case 2:
317                        if (file_printf(ms, ", Os: Windows, Version %d.%d",
318                            si.si_os_version & 0xff,
319                            (uint32_t)si.si_os_version >> 8) == -1)
320                                return -2;
321                        break;
322                case 1:
323                        if (file_printf(ms, ", Os: MacOS, Version %d.%d",
324                            (uint32_t)si.si_os_version >> 8,
325                            si.si_os_version & 0xff) == -1)
326                                return -2;
327                        break;
328                default:
329                        if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
330                            si.si_os_version & 0xff,
331                            (uint32_t)si.si_os_version >> 8) == -1)
332                                return -2;
333                        break;
334                }
335		if (root_storage) {
336			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
337			    clsid2desc);
338			if (str) {
339				if (file_printf(ms, ", %s", str) == -1)
340					return -2;
341			}
342		}
343	}
344
345        m = cdf_file_property_info(ms, info, count, root_storage);
346        free(info);
347
348        return m == -1 ? -2 : m;
349}
350
351#ifdef notdef
352private char *
353format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
354	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
355	    PRIx64 "-%.12" PRIx64,
356	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
357	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
358	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
359	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
360	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
361	return buf;
362}
363#endif
364
365private int
366cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
367    const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
368    const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
369{
370	int i;
371
372	if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
373	    dir, "Catalog", scn)) == -1)
374		return i;
375#ifdef CDF_DEBUG
376	cdf_dump_catalog(&h, scn);
377#endif
378	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
379		return -1;
380	return i;
381}
382
383private struct sinfo {
384	const char *name;
385	const char *mime;
386	const char *sections[5];
387	const int  types[5];
388} sectioninfo[] = {
389	{ "Encrypted", "encrypted",
390		{
391			"EncryptedPackage", NULL, NULL, NULL, NULL,
392		},
393		{
394			CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
395
396		},
397	},
398	{ "QuickBooks", "quickbooks",
399		{
400#if 0
401			"TaxForms", "PDFTaxForms", "modulesInBackup",
402#endif
403			"mfbu_header", NULL, NULL, NULL, NULL,
404		},
405		{
406#if 0
407			CDF_DIR_TYPE_USER_STORAGE,
408			CDF_DIR_TYPE_USER_STORAGE,
409			CDF_DIR_TYPE_USER_STREAM,
410#endif
411			CDF_DIR_TYPE_USER_STREAM,
412			0, 0, 0, 0
413		},
414	},
415};
416
417private int
418cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
419{
420	size_t sd, j;
421
422	for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
423		const struct sinfo *si = &sectioninfo[sd];
424		for (j = 0; si->sections[j]; j++) {
425			if (cdf_find_stream(dir, si->sections[j], si->types[j])
426			    <= 0) {
427#ifdef CDF_DEBUG
428				fprintf(stderr, "Can't read %s\n",
429				    si->sections[j]);
430#endif
431				break;
432			}
433		}
434		if (si->sections[j] != NULL)
435			continue;
436		if (NOTMIME(ms)) {
437			if (file_printf(ms, "CDFV2 %s", si->name) == -1)
438				return -1;
439		} else {
440			if (file_printf(ms, "application/CDFV2-%s",
441			    si->mime) == -1)
442				return -1;
443		}
444		return 1;
445	}
446	return -1;
447}
448
449protected int
450file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
451    size_t nbytes)
452{
453        cdf_info_t info;
454        cdf_header_t h;
455        cdf_sat_t sat, ssat;
456        cdf_stream_t sst, scn;
457        cdf_dir_t dir;
458        int i;
459        const char *expn = "";
460        const cdf_directory_t *root_storage;
461
462        info.i_fd = fd;
463        info.i_buf = buf;
464        info.i_len = nbytes;
465        if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
466                return 0;
467        if (cdf_read_header(&info, &h) == -1)
468                return 0;
469#ifdef CDF_DEBUG
470        cdf_dump_header(&h);
471#endif
472
473        if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
474                expn = "Can't read SAT";
475                goto out0;
476        }
477#ifdef CDF_DEBUG
478        cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
479#endif
480
481        if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
482                expn = "Can't read SSAT";
483                goto out1;
484        }
485#ifdef CDF_DEBUG
486        cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
487#endif
488
489        if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
490                expn = "Can't read directory";
491                goto out2;
492        }
493
494        if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
495	    &root_storage)) == -1) {
496                expn = "Cannot read short stream";
497                goto out3;
498        }
499#ifdef CDF_DEBUG
500        cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
501#endif
502#ifdef notdef
503	if (root_storage) {
504		if (NOTMIME(ms)) {
505			char clsbuf[128];
506			if (file_printf(ms, "CLSID %s, ",
507			    format_clsid(clsbuf, sizeof(clsbuf),
508			    root_storage->d_storage_uuid)) == -1)
509				return -1;
510		}
511	}
512#endif
513
514	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
515	    "FileHeader", &scn)) != -1) {
516#define HWP5_SIGNATURE "HWP Document File"
517		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
518		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
519		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
520		    if (NOTMIME(ms)) {
521			if (file_printf(ms,
522			    "Hangul (Korean) Word Processor File 5.x") == -1)
523			    return -1;
524		    } else {
525			if (file_printf(ms, "application/x-hwp") == -1)
526			    return -1;
527		    }
528		    i = 1;
529		    goto out5;
530		} else {
531		    free(scn.sst_tab);
532		    scn.sst_tab = NULL;
533		    scn.sst_len = 0;
534		    scn.sst_dirlen = 0;
535		}
536	}
537
538        if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
539            &scn)) == -1) {
540                if (errno != ESRCH) {
541                        expn = "Cannot read summary info";
542			goto out4;
543		}
544		i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
545		    &dir, &scn);
546		if (i > 0)
547			goto out4;
548		i = cdf_file_dir_info(ms, &dir);
549		if (i < 0)
550                        expn = "Cannot read section info";
551		goto out4;
552	}
553
554
555#ifdef CDF_DEBUG
556        cdf_dump_summary_info(&h, &scn);
557#endif
558        if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
559            expn = "Can't expand summary_info";
560
561	if (i == 0) {
562		const char *str = NULL;
563		cdf_directory_t *d;
564		char name[__arraycount(d->d_name)];
565		size_t j, k;
566
567		for (j = 0; str == NULL && j < dir.dir_len; j++) {
568			d = &dir.dir_tab[j];
569			for (k = 0; k < sizeof(name); k++)
570				name[k] = (char)cdf_tole2(d->d_name[k]);
571			str = cdf_app_to_mime(name,
572			    NOTMIME(ms) ? name2desc : name2mime);
573		}
574		if (NOTMIME(ms)) {
575			if (str != NULL) {
576				if (file_printf(ms, "%s", str) == -1)
577					return -1;
578				i = 1;
579			}
580		} else {
581			if (str == NULL)
582				str = "vnd.ms-office";
583			if (file_printf(ms, "application/%s", str) == -1)
584				return -1;
585			i = 1;
586		}
587	}
588out5:
589        free(scn.sst_tab);
590out4:
591        free(sst.sst_tab);
592out3:
593        free(dir.dir_tab);
594out2:
595        free(ssat.sat_tab);
596out1:
597        free(sat.sat_tab);
598out0:
599	if (i == -1) {
600	    if (NOTMIME(ms)) {
601		if (file_printf(ms,
602		    "Composite Document File V2 Document") == -1)
603		    return -1;
604		if (*expn)
605		    if (file_printf(ms, ", %s", expn) == -1)
606			return -1;
607	    } else {
608		if (file_printf(ms, "application/CDFV2-unknown") == -1)
609		    return -1;
610	    }
611	    i = 1;
612	}
613        return i;
614}
615