1/*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26/*
27 * Parse Composite Document Files, the format used in Microsoft Office
28 * document files before they switched to zipped XML.
29 * Info from: http://sc.openoffice.org/compdocfileformat.pdf
30 *
31 * N.B. This is the "Composite Document File" format, and not the
32 * "Compound Document Format", nor the "Channel Definition Format".
33 */
34
35#include "file.h"
36
37#ifndef lint
38FILE_RCSID("@(#)$File: cdf.c,v 1.116 2019/08/26 14:31:39 christos Exp $")
39#endif
40
41#include <assert.h>
42#ifdef CDF_DEBUG
43#include <err.h>
44#endif
45#include <stdlib.h>
46#include <unistd.h>
47#include <string.h>
48#include <time.h>
49#include <ctype.h>
50#include <limits.h>
51
52#ifndef EFTYPE
53#define EFTYPE EINVAL
54#endif
55
56#ifndef SIZE_T_MAX
57#define SIZE_T_MAX CAST(size_t, ~0ULL)
58#endif
59
60#include "cdf.h"
61
62#ifdef CDF_DEBUG
63#define DPRINTF(a) printf a, fflush(stdout)
64#else
65#define DPRINTF(a)
66#endif
67
68static union {
69	char s[4];
70	uint32_t u;
71} cdf_bo;
72
73#define NEED_SWAP	(cdf_bo.u == CAST(uint32_t, 0x01020304))
74
75#define CDF_TOLE8(x)	\
76    (CAST(uint64_t, NEED_SWAP ? _cdf_tole8(x) : CAST(uint64_t, x)))
77#define CDF_TOLE4(x)	\
78    (CAST(uint32_t, NEED_SWAP ? _cdf_tole4(x) : CAST(uint32_t, x)))
79#define CDF_TOLE2(x)	\
80    (CAST(uint16_t, NEED_SWAP ? _cdf_tole2(x) : CAST(uint16_t, x)))
81#define CDF_TOLE(x)	(/*CONSTCOND*/sizeof(x) == 2 ? \
82			    CDF_TOLE2(CAST(uint16_t, x)) : \
83			(/*CONSTCOND*/sizeof(x) == 4 ? \
84			    CDF_TOLE4(CAST(uint32_t, x)) : \
85			    CDF_TOLE8(CAST(uint64_t, x))))
86#define CDF_GETUINT32(x, y)	cdf_getuint32(x, y)
87
88#define CDF_MALLOC(n) cdf_malloc(__FILE__, __LINE__, (n))
89#define CDF_REALLOC(p, n) cdf_realloc(__FILE__, __LINE__, (p), (n))
90#define CDF_CALLOC(n, u) cdf_calloc(__FILE__, __LINE__, (n), (u))
91
92
93/*ARGSUSED*/
94static void *
95cdf_malloc(const char *file __attribute__((__unused__)),
96    size_t line __attribute__((__unused__)), size_t n)
97{
98	DPRINTF(("%s,%" SIZE_T_FORMAT "u: %s %" SIZE_T_FORMAT "u\n",
99	    file, line, __func__, n));
100	return malloc(n);
101}
102
103/*ARGSUSED*/
104static void *
105cdf_realloc(const char *file __attribute__((__unused__)),
106    size_t line __attribute__((__unused__)), void *p, size_t n)
107{
108	DPRINTF(("%s,%" SIZE_T_FORMAT "u: %s %" SIZE_T_FORMAT "u\n",
109	    file, line, __func__, n));
110	return realloc(p, n);
111}
112
113/*ARGSUSED*/
114static void *
115cdf_calloc(const char *file __attribute__((__unused__)),
116    size_t line __attribute__((__unused__)), size_t n, size_t u)
117{
118	DPRINTF(("%s,%" SIZE_T_FORMAT "u: %s %" SIZE_T_FORMAT "u %"
119	    SIZE_T_FORMAT "u\n", file, line, __func__, n, u));
120	return calloc(n, u);
121}
122
123/*
124 * swap a short
125 */
126static uint16_t
127_cdf_tole2(uint16_t sv)
128{
129	uint16_t rv;
130	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
131	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
132	d[0] = s[1];
133	d[1] = s[0];
134	return rv;
135}
136
137/*
138 * swap an int
139 */
140static uint32_t
141_cdf_tole4(uint32_t sv)
142{
143	uint32_t rv;
144	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
145	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
146	d[0] = s[3];
147	d[1] = s[2];
148	d[2] = s[1];
149	d[3] = s[0];
150	return rv;
151}
152
153/*
154 * swap a quad
155 */
156static uint64_t
157_cdf_tole8(uint64_t sv)
158{
159	uint64_t rv;
160	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
161	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
162	d[0] = s[7];
163	d[1] = s[6];
164	d[2] = s[5];
165	d[3] = s[4];
166	d[4] = s[3];
167	d[5] = s[2];
168	d[6] = s[1];
169	d[7] = s[0];
170	return rv;
171}
172
173/*
174 * grab a uint32_t from a possibly unaligned address, and return it in
175 * the native host order.
176 */
177static uint32_t
178cdf_getuint32(const uint8_t *p, size_t offs)
179{
180	uint32_t rv;
181	(void)memcpy(&rv, p + offs * sizeof(uint32_t), sizeof(rv));
182	return CDF_TOLE4(rv);
183}
184
185#define CDF_UNPACK(a)	\
186    (void)memcpy(&(a), &buf[len], sizeof(a)), len += sizeof(a)
187#define CDF_UNPACKA(a)	\
188    (void)memcpy((a), &buf[len], sizeof(a)), len += sizeof(a)
189
190uint16_t
191cdf_tole2(uint16_t sv)
192{
193	return CDF_TOLE2(sv);
194}
195
196uint32_t
197cdf_tole4(uint32_t sv)
198{
199	return CDF_TOLE4(sv);
200}
201
202uint64_t
203cdf_tole8(uint64_t sv)
204{
205	return CDF_TOLE8(sv);
206}
207
208void
209cdf_swap_header(cdf_header_t *h)
210{
211	size_t i;
212
213	h->h_magic = CDF_TOLE8(h->h_magic);
214	h->h_uuid[0] = CDF_TOLE8(h->h_uuid[0]);
215	h->h_uuid[1] = CDF_TOLE8(h->h_uuid[1]);
216	h->h_revision = CDF_TOLE2(h->h_revision);
217	h->h_version = CDF_TOLE2(h->h_version);
218	h->h_byte_order = CDF_TOLE2(h->h_byte_order);
219	h->h_sec_size_p2 = CDF_TOLE2(h->h_sec_size_p2);
220	h->h_short_sec_size_p2 = CDF_TOLE2(h->h_short_sec_size_p2);
221	h->h_num_sectors_in_sat = CDF_TOLE4(h->h_num_sectors_in_sat);
222	h->h_secid_first_directory = CDF_TOLE4(h->h_secid_first_directory);
223	h->h_min_size_standard_stream =
224	    CDF_TOLE4(h->h_min_size_standard_stream);
225	h->h_secid_first_sector_in_short_sat =
226	    CDF_TOLE4(CAST(uint32_t, h->h_secid_first_sector_in_short_sat));
227	h->h_num_sectors_in_short_sat =
228	    CDF_TOLE4(h->h_num_sectors_in_short_sat);
229	h->h_secid_first_sector_in_master_sat =
230	    CDF_TOLE4(CAST(uint32_t, h->h_secid_first_sector_in_master_sat));
231	h->h_num_sectors_in_master_sat =
232	    CDF_TOLE4(h->h_num_sectors_in_master_sat);
233	for (i = 0; i < __arraycount(h->h_master_sat); i++) {
234		h->h_master_sat[i] =
235		    CDF_TOLE4(CAST(uint32_t, h->h_master_sat[i]));
236	}
237}
238
239void
240cdf_unpack_header(cdf_header_t *h, char *buf)
241{
242	size_t i;
243	size_t len = 0;
244
245	CDF_UNPACK(h->h_magic);
246	CDF_UNPACKA(h->h_uuid);
247	CDF_UNPACK(h->h_revision);
248	CDF_UNPACK(h->h_version);
249	CDF_UNPACK(h->h_byte_order);
250	CDF_UNPACK(h->h_sec_size_p2);
251	CDF_UNPACK(h->h_short_sec_size_p2);
252	CDF_UNPACKA(h->h_unused0);
253	CDF_UNPACK(h->h_num_sectors_in_sat);
254	CDF_UNPACK(h->h_secid_first_directory);
255	CDF_UNPACKA(h->h_unused1);
256	CDF_UNPACK(h->h_min_size_standard_stream);
257	CDF_UNPACK(h->h_secid_first_sector_in_short_sat);
258	CDF_UNPACK(h->h_num_sectors_in_short_sat);
259	CDF_UNPACK(h->h_secid_first_sector_in_master_sat);
260	CDF_UNPACK(h->h_num_sectors_in_master_sat);
261	for (i = 0; i < __arraycount(h->h_master_sat); i++)
262		CDF_UNPACK(h->h_master_sat[i]);
263}
264
265void
266cdf_swap_dir(cdf_directory_t *d)
267{
268	d->d_namelen = CDF_TOLE2(d->d_namelen);
269	d->d_left_child = CDF_TOLE4(CAST(uint32_t, d->d_left_child));
270	d->d_right_child = CDF_TOLE4(CAST(uint32_t, d->d_right_child));
271	d->d_storage = CDF_TOLE4(CAST(uint32_t, d->d_storage));
272	d->d_storage_uuid[0] = CDF_TOLE8(d->d_storage_uuid[0]);
273	d->d_storage_uuid[1] = CDF_TOLE8(d->d_storage_uuid[1]);
274	d->d_flags = CDF_TOLE4(d->d_flags);
275	d->d_created = CDF_TOLE8(CAST(uint64_t, d->d_created));
276	d->d_modified = CDF_TOLE8(CAST(uint64_t, d->d_modified));
277	d->d_stream_first_sector = CDF_TOLE4(
278	    CAST(uint32_t, d->d_stream_first_sector));
279	d->d_size = CDF_TOLE4(d->d_size);
280}
281
282void
283cdf_swap_class(cdf_classid_t *d)
284{
285	d->cl_dword = CDF_TOLE4(d->cl_dword);
286	d->cl_word[0] = CDF_TOLE2(d->cl_word[0]);
287	d->cl_word[1] = CDF_TOLE2(d->cl_word[1]);
288}
289
290void
291cdf_unpack_dir(cdf_directory_t *d, char *buf)
292{
293	size_t len = 0;
294
295	CDF_UNPACKA(d->d_name);
296	CDF_UNPACK(d->d_namelen);
297	CDF_UNPACK(d->d_type);
298	CDF_UNPACK(d->d_color);
299	CDF_UNPACK(d->d_left_child);
300	CDF_UNPACK(d->d_right_child);
301	CDF_UNPACK(d->d_storage);
302	CDF_UNPACKA(d->d_storage_uuid);
303	CDF_UNPACK(d->d_flags);
304	CDF_UNPACK(d->d_created);
305	CDF_UNPACK(d->d_modified);
306	CDF_UNPACK(d->d_stream_first_sector);
307	CDF_UNPACK(d->d_size);
308	CDF_UNPACK(d->d_unused0);
309}
310
311int
312cdf_zero_stream(cdf_stream_t *scn)
313{
314	scn->sst_len = 0;
315	scn->sst_dirlen = 0;
316	scn->sst_ss = 0;
317	free(scn->sst_tab);
318	scn->sst_tab = NULL;
319	return -1;
320}
321
322static size_t
323cdf_check_stream(const cdf_stream_t *sst, const cdf_header_t *h)
324{
325	size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
326	    CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
327	assert(ss == sst->sst_ss);
328	return sst->sst_ss;
329}
330
331static int
332cdf_check_stream_offset(const cdf_stream_t *sst, const cdf_header_t *h,
333    const void *p, size_t tail, int line)
334{
335	const char *b = RCAST(const char *, sst->sst_tab);
336	const char *e = RCAST(const char *, p) + tail;
337	size_t ss = cdf_check_stream(sst, h);
338	/*LINTED*/(void)&line;
339	if (e >= b && CAST(size_t, e - b) <= ss * sst->sst_len)
340		return 0;
341	DPRINTF(("%d: offset begin %p < end %p || %" SIZE_T_FORMAT "u"
342	    " > %" SIZE_T_FORMAT "u [%" SIZE_T_FORMAT "u %"
343	    SIZE_T_FORMAT "u]\n", line, b, e, (size_t)(e - b),
344	    ss * sst->sst_len, ss, sst->sst_len));
345	errno = EFTYPE;
346	return -1;
347}
348
349static ssize_t
350cdf_read(const cdf_info_t *info, off_t off, void *buf, size_t len)
351{
352	size_t siz = CAST(size_t, off + len);
353
354	if (CAST(off_t, off + len) != CAST(off_t, siz))
355		goto out;
356
357	if (info->i_buf != NULL && info->i_len >= siz) {
358		(void)memcpy(buf, &info->i_buf[off], len);
359		return CAST(ssize_t, len);
360	}
361
362	if (info->i_fd == -1)
363		goto out;
364
365	if (pread(info->i_fd, buf, len, off) != CAST(ssize_t, len))
366		return -1;
367
368	return CAST(ssize_t, len);
369out:
370	errno = EINVAL;
371	return -1;
372}
373
374int
375cdf_read_header(const cdf_info_t *info, cdf_header_t *h)
376{
377	char buf[512];
378
379	(void)memcpy(cdf_bo.s, "\01\02\03\04", 4);
380	if (cdf_read(info, CAST(off_t, 0), buf, sizeof(buf)) == -1)
381		return -1;
382	cdf_unpack_header(h, buf);
383	cdf_swap_header(h);
384	if (h->h_magic != CDF_MAGIC) {
385		DPRINTF(("Bad magic %#" INT64_T_FORMAT "x != %#"
386		    INT64_T_FORMAT "x\n",
387		    (unsigned long long)h->h_magic,
388		    (unsigned long long)CDF_MAGIC));
389		goto out;
390	}
391	if (h->h_sec_size_p2 > 20) {
392		DPRINTF(("Bad sector size %hu\n", h->h_sec_size_p2));
393		goto out;
394	}
395	if (h->h_short_sec_size_p2 > 20) {
396		DPRINTF(("Bad short sector size %hu\n",
397		    h->h_short_sec_size_p2));
398		goto out;
399	}
400	return 0;
401out:
402	errno = EFTYPE;
403	return -1;
404}
405
406
407ssize_t
408cdf_read_sector(const cdf_info_t *info, void *buf, size_t offs, size_t len,
409    const cdf_header_t *h, cdf_secid_t id)
410{
411	size_t ss = CDF_SEC_SIZE(h);
412	size_t pos;
413
414	if (SIZE_T_MAX / ss < CAST(size_t, id))
415		return -1;
416
417	pos = CDF_SEC_POS(h, id);
418	assert(ss == len);
419	return cdf_read(info, CAST(off_t, pos), RCAST(char *, buf) + offs, len);
420}
421
422ssize_t
423cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs,
424    size_t len, const cdf_header_t *h, cdf_secid_t id)
425{
426	size_t ss = CDF_SHORT_SEC_SIZE(h);
427	size_t pos;
428
429	if (SIZE_T_MAX / ss < CAST(size_t, id))
430		return -1;
431
432	pos = CDF_SHORT_SEC_POS(h, id);
433	assert(ss == len);
434	if (pos + len > CDF_SEC_SIZE(h) * sst->sst_len) {
435		DPRINTF(("Out of bounds read %" SIZE_T_FORMAT "u > %"
436		    SIZE_T_FORMAT "u\n",
437		    pos + len, CDF_SEC_SIZE(h) * sst->sst_len));
438		goto out;
439	}
440	(void)memcpy(RCAST(char *, buf) + offs,
441	    RCAST(const char *, sst->sst_tab) + pos, len);
442	return len;
443out:
444	errno = EFTYPE;
445	return -1;
446}
447
448/*
449 * Read the sector allocation table.
450 */
451int
452cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat)
453{
454	size_t i, j, k;
455	size_t ss = CDF_SEC_SIZE(h);
456	cdf_secid_t *msa, mid, sec;
457	size_t nsatpersec = (ss / sizeof(mid)) - 1;
458
459	for (i = 0; i < __arraycount(h->h_master_sat); i++)
460		if (h->h_master_sat[i] == CDF_SECID_FREE)
461			break;
462
463#define CDF_SEC_LIMIT (UINT32_MAX / (64 * ss))
464	if ((nsatpersec > 0 &&
465	    h->h_num_sectors_in_master_sat > CDF_SEC_LIMIT / nsatpersec) ||
466	    i > CDF_SEC_LIMIT) {
467		DPRINTF(("Number of sectors in master SAT too big %u %"
468		    SIZE_T_FORMAT "u\n", h->h_num_sectors_in_master_sat, i));
469		errno = EFTYPE;
470		return -1;
471	}
472
473	sat->sat_len = h->h_num_sectors_in_master_sat * nsatpersec + i;
474	DPRINTF(("sat_len = %" SIZE_T_FORMAT "u ss = %" SIZE_T_FORMAT "u\n",
475	    sat->sat_len, ss));
476	if ((sat->sat_tab = CAST(cdf_secid_t *, CDF_CALLOC(sat->sat_len, ss)))
477	    == NULL)
478		return -1;
479
480	for (i = 0; i < __arraycount(h->h_master_sat); i++) {
481		if (h->h_master_sat[i] < 0)
482			break;
483		if (cdf_read_sector(info, sat->sat_tab, ss * i, ss, h,
484		    h->h_master_sat[i]) != CAST(ssize_t, ss)) {
485			DPRINTF(("Reading sector %d", h->h_master_sat[i]));
486			goto out1;
487		}
488	}
489
490	if ((msa = CAST(cdf_secid_t *, CDF_CALLOC(1, ss))) == NULL)
491		goto out1;
492
493	mid = h->h_secid_first_sector_in_master_sat;
494	for (j = 0; j < h->h_num_sectors_in_master_sat; j++) {
495		if (mid < 0)
496			goto out;
497		if (j >= CDF_LOOP_LIMIT) {
498			DPRINTF(("Reading master sector loop limit"));
499			goto out3;
500		}
501		if (cdf_read_sector(info, msa, 0, ss, h, mid) !=
502		    CAST(ssize_t, ss)) {
503			DPRINTF(("Reading master sector %d", mid));
504			goto out2;
505		}
506		for (k = 0; k < nsatpersec; k++, i++) {
507			sec = CDF_TOLE4(CAST(uint32_t, msa[k]));
508			if (sec < 0)
509				goto out;
510			if (i >= sat->sat_len) {
511			    DPRINTF(("Out of bounds reading MSA %"
512				SIZE_T_FORMAT "u >= %" SIZE_T_FORMAT "u",
513				i, sat->sat_len));
514			    goto out3;
515			}
516			if (cdf_read_sector(info, sat->sat_tab, ss * i, ss, h,
517			    sec) != CAST(ssize_t, ss)) {
518				DPRINTF(("Reading sector %d",
519				    CDF_TOLE4(msa[k])));
520				goto out2;
521			}
522		}
523		mid = CDF_TOLE4(CAST(uint32_t, msa[nsatpersec]));
524	}
525out:
526	sat->sat_len = i;
527	free(msa);
528	return 0;
529out3:
530	errno = EFTYPE;
531out2:
532	free(msa);
533out1:
534	free(sat->sat_tab);
535	return -1;
536}
537
538size_t
539cdf_count_chain(const cdf_sat_t *sat, cdf_secid_t sid, size_t size)
540{
541	size_t i, j;
542	cdf_secid_t maxsector = CAST(cdf_secid_t, (sat->sat_len * size)
543	    / sizeof(maxsector));
544
545	DPRINTF(("Chain:"));
546	if (sid == CDF_SECID_END_OF_CHAIN) {
547		/* 0-length chain. */
548		DPRINTF((" empty\n"));
549		return 0;
550	}
551
552	for (j = i = 0; sid >= 0; i++, j++) {
553		DPRINTF((" %d", sid));
554		if (j >= CDF_LOOP_LIMIT) {
555			DPRINTF(("Counting chain loop limit"));
556			goto out;
557		}
558		if (sid >= maxsector) {
559			DPRINTF(("Sector %d >= %d\n", sid, maxsector));
560			goto out;
561		}
562		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
563	}
564	if (i == 0) {
565		DPRINTF((" none, sid: %d\n", sid));
566		goto out;
567
568	}
569	DPRINTF(("\n"));
570	return i;
571out:
572	errno = EFTYPE;
573	return CAST(size_t, -1);
574}
575
576int
577cdf_read_long_sector_chain(const cdf_info_t *info, const cdf_header_t *h,
578    const cdf_sat_t *sat, cdf_secid_t sid, size_t len, cdf_stream_t *scn)
579{
580	size_t ss = CDF_SEC_SIZE(h), i, j;
581	ssize_t nr;
582	scn->sst_tab = NULL;
583	scn->sst_len = cdf_count_chain(sat, sid, ss);
584	scn->sst_dirlen = MAX(h->h_min_size_standard_stream, len);
585	scn->sst_ss = ss;
586
587	if (sid == CDF_SECID_END_OF_CHAIN || len == 0)
588		return cdf_zero_stream(scn);
589
590	if (scn->sst_len == CAST(size_t, -1))
591		goto out;
592
593	scn->sst_tab = CDF_CALLOC(scn->sst_len, ss);
594	if (scn->sst_tab == NULL)
595		return cdf_zero_stream(scn);
596
597	for (j = i = 0; sid >= 0; i++, j++) {
598		if (j >= CDF_LOOP_LIMIT) {
599			DPRINTF(("Read long sector chain loop limit"));
600			goto out;
601		}
602		if (i >= scn->sst_len) {
603			DPRINTF(("Out of bounds reading long sector chain "
604			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i,
605			    scn->sst_len));
606			goto out;
607		}
608		if ((nr = cdf_read_sector(info, scn->sst_tab, i * ss, ss, h,
609		    sid)) != CAST(ssize_t, ss)) {
610			if (i == scn->sst_len - 1 && nr > 0) {
611				/* Last sector might be truncated */
612				return 0;
613			}
614			DPRINTF(("Reading long sector chain %d", sid));
615			goto out;
616		}
617		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
618	}
619	return 0;
620out:
621	errno = EFTYPE;
622	return cdf_zero_stream(scn);
623}
624
625int
626cdf_read_short_sector_chain(const cdf_header_t *h,
627    const cdf_sat_t *ssat, const cdf_stream_t *sst,
628    cdf_secid_t sid, size_t len, cdf_stream_t *scn)
629{
630	size_t ss = CDF_SHORT_SEC_SIZE(h), i, j;
631	scn->sst_tab = NULL;
632	scn->sst_len = cdf_count_chain(ssat, sid, CDF_SEC_SIZE(h));
633	scn->sst_dirlen = len;
634	scn->sst_ss = ss;
635
636	if (scn->sst_len == CAST(size_t, -1))
637		goto out;
638
639	scn->sst_tab = CDF_CALLOC(scn->sst_len, ss);
640	if (scn->sst_tab == NULL)
641		return cdf_zero_stream(scn);
642
643	for (j = i = 0; sid >= 0; i++, j++) {
644		if (j >= CDF_LOOP_LIMIT) {
645			DPRINTF(("Read short sector chain loop limit"));
646			goto out;
647		}
648		if (i >= scn->sst_len) {
649			DPRINTF(("Out of bounds reading short sector chain "
650			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n",
651			    i, scn->sst_len));
652			goto out;
653		}
654		if (cdf_read_short_sector(sst, scn->sst_tab, i * ss, ss, h,
655		    sid) != CAST(ssize_t, ss)) {
656			DPRINTF(("Reading short sector chain %d", sid));
657			goto out;
658		}
659		sid = CDF_TOLE4(CAST(uint32_t, ssat->sat_tab[sid]));
660	}
661	return 0;
662out:
663	errno = EFTYPE;
664	return cdf_zero_stream(scn);
665}
666
667int
668cdf_read_sector_chain(const cdf_info_t *info, const cdf_header_t *h,
669    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
670    cdf_secid_t sid, size_t len, cdf_stream_t *scn)
671{
672
673	if (len < h->h_min_size_standard_stream && sst->sst_tab != NULL)
674		return cdf_read_short_sector_chain(h, ssat, sst, sid, len,
675		    scn);
676	else
677		return cdf_read_long_sector_chain(info, h, sat, sid, len, scn);
678}
679
680int
681cdf_read_dir(const cdf_info_t *info, const cdf_header_t *h,
682    const cdf_sat_t *sat, cdf_dir_t *dir)
683{
684	size_t i, j;
685	size_t ss = CDF_SEC_SIZE(h), ns, nd;
686	char *buf;
687	cdf_secid_t sid = h->h_secid_first_directory;
688
689	ns = cdf_count_chain(sat, sid, ss);
690	if (ns == CAST(size_t, -1))
691		return -1;
692
693	nd = ss / CDF_DIRECTORY_SIZE;
694
695	dir->dir_len = ns * nd;
696	dir->dir_tab = CAST(cdf_directory_t *,
697	    CDF_CALLOC(dir->dir_len, sizeof(dir->dir_tab[0])));
698	if (dir->dir_tab == NULL)
699		return -1;
700
701	if ((buf = CAST(char *, CDF_MALLOC(ss))) == NULL) {
702		free(dir->dir_tab);
703		return -1;
704	}
705
706	for (j = i = 0; i < ns; i++, j++) {
707		if (j >= CDF_LOOP_LIMIT) {
708			DPRINTF(("Read dir loop limit"));
709			goto out;
710		}
711		if (cdf_read_sector(info, buf, 0, ss, h, sid) !=
712		    CAST(ssize_t, ss)) {
713			DPRINTF(("Reading directory sector %d", sid));
714			goto out;
715		}
716		for (j = 0; j < nd; j++) {
717			cdf_unpack_dir(&dir->dir_tab[i * nd + j],
718			    &buf[j * CDF_DIRECTORY_SIZE]);
719		}
720		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
721	}
722	if (NEED_SWAP)
723		for (i = 0; i < dir->dir_len; i++)
724			cdf_swap_dir(&dir->dir_tab[i]);
725	free(buf);
726	return 0;
727out:
728	free(dir->dir_tab);
729	free(buf);
730	errno = EFTYPE;
731	return -1;
732}
733
734
735int
736cdf_read_ssat(const cdf_info_t *info, const cdf_header_t *h,
737    const cdf_sat_t *sat, cdf_sat_t *ssat)
738{
739	size_t i, j;
740	size_t ss = CDF_SEC_SIZE(h);
741	cdf_secid_t sid = h->h_secid_first_sector_in_short_sat;
742
743	ssat->sat_tab = NULL;
744	ssat->sat_len = cdf_count_chain(sat, sid, ss);
745	if (ssat->sat_len == CAST(size_t, -1))
746		goto out;
747
748	ssat->sat_tab = CAST(cdf_secid_t *, CDF_CALLOC(ssat->sat_len, ss));
749	if (ssat->sat_tab == NULL)
750		goto out1;
751
752	for (j = i = 0; sid >= 0; i++, j++) {
753		if (j >= CDF_LOOP_LIMIT) {
754			DPRINTF(("Read short sat sector loop limit"));
755			goto out;
756		}
757		if (i >= ssat->sat_len) {
758			DPRINTF(("Out of bounds reading short sector chain "
759			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i,
760			    ssat->sat_len));
761			goto out;
762		}
763		if (cdf_read_sector(info, ssat->sat_tab, i * ss, ss, h, sid) !=
764		    CAST(ssize_t, ss)) {
765			DPRINTF(("Reading short sat sector %d", sid));
766			goto out1;
767		}
768		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
769	}
770	return 0;
771out:
772	errno = EFTYPE;
773out1:
774	free(ssat->sat_tab);
775	return -1;
776}
777
778int
779cdf_read_short_stream(const cdf_info_t *info, const cdf_header_t *h,
780    const cdf_sat_t *sat, const cdf_dir_t *dir, cdf_stream_t *scn,
781    const cdf_directory_t **root)
782{
783	size_t i;
784	const cdf_directory_t *d;
785
786	*root = NULL;
787	for (i = 0; i < dir->dir_len; i++)
788		if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_ROOT_STORAGE)
789			break;
790
791	/* If the it is not there, just fake it; some docs don't have it */
792	if (i == dir->dir_len) {
793		DPRINTF(("Cannot find root storage dir\n"));
794		goto out;
795	}
796	d = &dir->dir_tab[i];
797	*root = d;
798
799	/* If the it is not there, just fake it; some docs don't have it */
800	if (d->d_stream_first_sector < 0) {
801		DPRINTF(("No first secror in dir\n"));
802		goto out;
803	}
804
805	return cdf_read_long_sector_chain(info, h, sat,
806	    d->d_stream_first_sector, d->d_size, scn);
807out:
808	scn->sst_tab = NULL;
809	(void)cdf_zero_stream(scn);
810	return 0;
811}
812
813static int
814cdf_namecmp(const char *d, const uint16_t *s, size_t l)
815{
816	for (; l--; d++, s++)
817		if (*d != CDF_TOLE2(*s))
818			return CAST(unsigned char, *d) - CDF_TOLE2(*s);
819	return 0;
820}
821
822int
823cdf_read_doc_summary_info(const cdf_info_t *info, const cdf_header_t *h,
824    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
825    const cdf_dir_t *dir, cdf_stream_t *scn)
826{
827	return cdf_read_user_stream(info, h, sat, ssat, sst, dir,
828	    "\05DocumentSummaryInformation", scn);
829}
830
831int
832cdf_read_summary_info(const cdf_info_t *info, const cdf_header_t *h,
833    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
834    const cdf_dir_t *dir, cdf_stream_t *scn)
835{
836	return cdf_read_user_stream(info, h, sat, ssat, sst, dir,
837	    "\05SummaryInformation", scn);
838}
839
840int
841cdf_read_user_stream(const cdf_info_t *info, const cdf_header_t *h,
842    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
843    const cdf_dir_t *dir, const char *name, cdf_stream_t *scn)
844{
845	const cdf_directory_t *d;
846	int i = cdf_find_stream(dir, name, CDF_DIR_TYPE_USER_STREAM);
847
848	if (i <= 0) {
849		memset(scn, 0, sizeof(*scn));
850		return -1;
851	}
852
853	d = &dir->dir_tab[i - 1];
854	return cdf_read_sector_chain(info, h, sat, ssat, sst,
855	    d->d_stream_first_sector, d->d_size, scn);
856}
857
858int
859cdf_find_stream(const cdf_dir_t *dir, const char *name, int type)
860{
861	size_t i, name_len = strlen(name) + 1;
862
863	for (i = dir->dir_len; i > 0; i--)
864		if (dir->dir_tab[i - 1].d_type == type &&
865		    cdf_namecmp(name, dir->dir_tab[i - 1].d_name, name_len)
866		    == 0)
867			break;
868	if (i > 0)
869		return CAST(int, i);
870
871	DPRINTF(("Cannot find type %d `%s'\n", type, name));
872	errno = ESRCH;
873	return 0;
874}
875
876#define CDF_SHLEN_LIMIT (UINT32_MAX / 64)
877#define CDF_PROP_LIMIT (UINT32_MAX / (64 * sizeof(cdf_property_info_t)))
878
879static const void *
880cdf_offset(const void *p, size_t l)
881{
882	return CAST(const void *, CAST(const uint8_t *, p) + l);
883}
884
885static const uint8_t *
886cdf_get_property_info_pos(const cdf_stream_t *sst, const cdf_header_t *h,
887    const uint8_t *p, const uint8_t *e, size_t i)
888{
889	size_t tail = (i << 1) + 1;
890	size_t ofs;
891	const uint8_t *q;
892
893	if (p >= e) {
894		DPRINTF(("Past end %p < %p\n", e, p));
895		return NULL;
896	}
897	if (cdf_check_stream_offset(sst, h, p, (tail + 1) * sizeof(uint32_t),
898	    __LINE__) == -1)
899		return NULL;
900	ofs = CDF_GETUINT32(p, tail);
901	q = CAST(const uint8_t *, cdf_offset(CAST(const void *, p),
902	    ofs - 2 * sizeof(uint32_t)));
903
904	if (q < p) {
905		DPRINTF(("Wrapped around %p < %p\n", q, p));
906		return NULL;
907	}
908
909	if (q >= e) {
910		DPRINTF(("Ran off the end %p >= %p\n", q, e));
911		return NULL;
912	}
913	return q;
914}
915
916static cdf_property_info_t *
917cdf_grow_info(cdf_property_info_t **info, size_t *maxcount, size_t incr)
918{
919	cdf_property_info_t *inp;
920	size_t newcount = *maxcount + incr;
921
922	if (newcount > CDF_PROP_LIMIT) {
923		DPRINTF(("exceeded property limit %" SIZE_T_FORMAT "u > %"
924		    SIZE_T_FORMAT "u\n", newcount, CDF_PROP_LIMIT));
925		goto out;
926	}
927	inp = CAST(cdf_property_info_t *,
928	    CDF_REALLOC(*info, newcount * sizeof(*inp)));
929	if (inp == NULL)
930		goto out;
931
932	*info = inp;
933	*maxcount = newcount;
934	return inp;
935out:
936	free(*info);
937	*maxcount = 0;
938	*info = NULL;
939	return NULL;
940}
941
942static int
943cdf_copy_info(cdf_property_info_t *inp, const void *p, const void *e,
944    size_t len)
945{
946	if (inp->pi_type & CDF_VECTOR)
947		return 0;
948
949	if (CAST(size_t, CAST(const char *, e) - CAST(const char *, p)) < len)
950		return 0;
951
952	(void)memcpy(&inp->pi_val, p, len);
953
954	switch (len) {
955	case 2:
956		inp->pi_u16 = CDF_TOLE2(inp->pi_u16);
957		break;
958	case 4:
959		inp->pi_u32 = CDF_TOLE4(inp->pi_u32);
960		break;
961	case 8:
962		inp->pi_u64 = CDF_TOLE8(inp->pi_u64);
963		break;
964	default:
965		abort();
966	}
967	return 1;
968}
969
970int
971cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h,
972    uint32_t offs, cdf_property_info_t **info, size_t *count, size_t *maxcount)
973{
974	const cdf_section_header_t *shp;
975	cdf_section_header_t sh;
976	const uint8_t *p, *q, *e;
977	size_t i, o4, nelements, j, slen, left;
978	cdf_property_info_t *inp;
979
980	if (offs > UINT32_MAX / 4) {
981		errno = EFTYPE;
982		goto out;
983	}
984	shp = CAST(const cdf_section_header_t *,
985	    cdf_offset(sst->sst_tab, offs));
986	if (cdf_check_stream_offset(sst, h, shp, sizeof(*shp), __LINE__) == -1)
987		goto out;
988	sh.sh_len = CDF_TOLE4(shp->sh_len);
989	if (sh.sh_len > CDF_SHLEN_LIMIT) {
990		errno = EFTYPE;
991		goto out;
992	}
993
994	if (cdf_check_stream_offset(sst, h, shp, sh.sh_len, __LINE__) == -1)
995		goto out;
996
997	sh.sh_properties = CDF_TOLE4(shp->sh_properties);
998	DPRINTF(("section len: %u properties %u\n", sh.sh_len,
999	    sh.sh_properties));
1000	if (sh.sh_properties > CDF_PROP_LIMIT)
1001		goto out;
1002	inp = cdf_grow_info(info, maxcount, sh.sh_properties);
1003	if (inp == NULL)
1004		goto out;
1005	inp += *count;
1006	*count += sh.sh_properties;
1007	p = CAST(const uint8_t *, cdf_offset(sst->sst_tab, offs + sizeof(sh)));
1008	e = CAST(const uint8_t *, cdf_offset(shp, sh.sh_len));
1009	if (p >= e || cdf_check_stream_offset(sst, h, e, 0, __LINE__) == -1)
1010		goto out;
1011
1012	for (i = 0; i < sh.sh_properties; i++) {
1013		if ((q = cdf_get_property_info_pos(sst, h, p, e, i)) == NULL)
1014			goto out;
1015		inp[i].pi_id = CDF_GETUINT32(p, i << 1);
1016		left = CAST(size_t, e - q);
1017		if (left < sizeof(uint32_t)) {
1018			DPRINTF(("short info (no type)_\n"));
1019			goto out;
1020		}
1021		inp[i].pi_type = CDF_GETUINT32(q, 0);
1022		DPRINTF(("%" SIZE_T_FORMAT "u) id=%#x type=%#x offs=%#tx,%#x\n",
1023		    i, inp[i].pi_id, inp[i].pi_type, q - p, offs));
1024		if (inp[i].pi_type & CDF_VECTOR) {
1025			if (left < sizeof(uint32_t) * 2) {
1026				DPRINTF(("missing CDF_VECTOR length\n"));
1027				goto out;
1028			}
1029			nelements = CDF_GETUINT32(q, 1);
1030			if (nelements > CDF_ELEMENT_LIMIT || nelements == 0) {
1031				DPRINTF(("CDF_VECTOR with nelements == %"
1032				    SIZE_T_FORMAT "u\n", nelements));
1033				goto out;
1034			}
1035			slen = 2;
1036		} else {
1037			nelements = 1;
1038			slen = 1;
1039		}
1040		o4 = slen * sizeof(uint32_t);
1041		if (inp[i].pi_type & (CDF_ARRAY|CDF_BYREF|CDF_RESERVED))
1042			goto unknown;
1043		switch (inp[i].pi_type & CDF_TYPEMASK) {
1044		case CDF_NULL:
1045		case CDF_EMPTY:
1046			break;
1047		case CDF_SIGNED16:
1048			if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int16_t)))
1049				goto unknown;
1050			break;
1051		case CDF_SIGNED32:
1052		case CDF_BOOL:
1053		case CDF_UNSIGNED32:
1054		case CDF_FLOAT:
1055			if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int32_t)))
1056				goto unknown;
1057			break;
1058		case CDF_SIGNED64:
1059		case CDF_UNSIGNED64:
1060		case CDF_DOUBLE:
1061		case CDF_FILETIME:
1062			if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int64_t)))
1063				goto unknown;
1064			break;
1065		case CDF_LENGTH32_STRING:
1066		case CDF_LENGTH32_WSTRING:
1067			if (nelements > 1) {
1068				size_t nelem = inp - *info;
1069				inp = cdf_grow_info(info, maxcount, nelements);
1070				if (inp == NULL)
1071					goto out;
1072				inp += nelem;
1073			}
1074			for (j = 0; j < nelements && i < sh.sh_properties;
1075			    j++, i++)
1076			{
1077				uint32_t l;
1078
1079				if (o4 + sizeof(uint32_t) > left)
1080					goto out;
1081
1082				l = CDF_GETUINT32(q, slen);
1083				o4 += sizeof(uint32_t);
1084				if (o4 + l > left)
1085					goto out;
1086
1087				inp[i].pi_str.s_len = l;
1088				inp[i].pi_str.s_buf = CAST(const char *,
1089				    CAST(const void *, &q[o4]));
1090
1091				DPRINTF(("o=%" SIZE_T_FORMAT "u l=%d(%"
1092				    SIZE_T_FORMAT "u), t=%" SIZE_T_FORMAT
1093				    "u s=%s\n", o4, l, CDF_ROUND(l, sizeof(l)),
1094				    left, inp[i].pi_str.s_buf));
1095
1096				if (l & 1)
1097					l++;
1098
1099				slen += l >> 1;
1100				o4 = slen * sizeof(uint32_t);
1101			}
1102			i--;
1103			break;
1104		case CDF_CLIPBOARD:
1105			if (inp[i].pi_type & CDF_VECTOR)
1106				goto unknown;
1107			break;
1108		default:
1109		unknown:
1110			memset(&inp[i].pi_val, 0, sizeof(inp[i].pi_val));
1111			DPRINTF(("Don't know how to deal with %#x\n",
1112			    inp[i].pi_type));
1113			break;
1114		}
1115	}
1116	return 0;
1117out:
1118	free(*info);
1119	*info = NULL;
1120	*count = 0;
1121	*maxcount = 0;
1122	errno = EFTYPE;
1123	return -1;
1124}
1125
1126int
1127cdf_unpack_summary_info(const cdf_stream_t *sst, const cdf_header_t *h,
1128    cdf_summary_info_header_t *ssi, cdf_property_info_t **info, size_t *count)
1129{
1130	size_t maxcount;
1131	const cdf_summary_info_header_t *si =
1132	    CAST(const cdf_summary_info_header_t *, sst->sst_tab);
1133	const cdf_section_declaration_t *sd =
1134	    CAST(const cdf_section_declaration_t *, RCAST(const void *,
1135	    RCAST(const char *, sst->sst_tab)
1136	    + CDF_SECTION_DECLARATION_OFFSET));
1137
1138	if (cdf_check_stream_offset(sst, h, si, sizeof(*si), __LINE__) == -1 ||
1139	    cdf_check_stream_offset(sst, h, sd, sizeof(*sd), __LINE__) == -1)
1140		return -1;
1141	ssi->si_byte_order = CDF_TOLE2(si->si_byte_order);
1142	ssi->si_os_version = CDF_TOLE2(si->si_os_version);
1143	ssi->si_os = CDF_TOLE2(si->si_os);
1144	ssi->si_class = si->si_class;
1145	cdf_swap_class(&ssi->si_class);
1146	ssi->si_count = CDF_TOLE4(si->si_count);
1147	*count = 0;
1148	maxcount = 0;
1149	*info = NULL;
1150	if (cdf_read_property_info(sst, h, CDF_TOLE4(sd->sd_offset), info,
1151	    count, &maxcount) == -1)
1152		return -1;
1153	return 0;
1154}
1155
1156
1157#define extract_catalog_field(t, f, l) \
1158    if (b + l + sizeof(cep->f) > eb) { \
1159	    cep->ce_namlen = 0; \
1160	    break; \
1161    } \
1162    memcpy(&cep->f, b + (l), sizeof(cep->f)); \
1163    ce[i].f = CAST(t, CDF_TOLE(cep->f))
1164
1165int
1166cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst,
1167    cdf_catalog_t **cat)
1168{
1169	size_t ss = cdf_check_stream(sst, h);
1170	const char *b = CAST(const char *, sst->sst_tab);
1171	const char *nb, *eb = b + ss * sst->sst_len;
1172	size_t nr, i, j, k;
1173	cdf_catalog_entry_t *ce;
1174	uint16_t reclen;
1175	const uint16_t *np;
1176
1177	for (nr = 0;; nr++) {
1178		memcpy(&reclen, b, sizeof(reclen));
1179		reclen = CDF_TOLE2(reclen);
1180		if (reclen == 0)
1181			break;
1182		b += reclen;
1183		if (b > eb)
1184		    break;
1185	}
1186	if (nr == 0)
1187		return -1;
1188	nr--;
1189	*cat = CAST(cdf_catalog_t *,
1190	    CDF_MALLOC(sizeof(cdf_catalog_t) + nr * sizeof(*ce)));
1191	if (*cat == NULL)
1192		return -1;
1193	ce = (*cat)->cat_e;
1194	memset(ce, 0, nr * sizeof(*ce));
1195	b = CAST(const char *, sst->sst_tab);
1196	for (j = i = 0; i < nr; b += reclen) {
1197		cdf_catalog_entry_t *cep = &ce[j];
1198		uint16_t rlen;
1199
1200		extract_catalog_field(uint16_t, ce_namlen, 0);
1201		extract_catalog_field(uint16_t, ce_num, 4);
1202		extract_catalog_field(uint64_t, ce_timestamp, 8);
1203		reclen = cep->ce_namlen;
1204
1205		if (reclen < 14) {
1206			cep->ce_namlen = 0;
1207			continue;
1208		}
1209
1210		cep->ce_namlen = __arraycount(cep->ce_name) - 1;
1211		rlen = reclen - 14;
1212		if (cep->ce_namlen > rlen)
1213			cep->ce_namlen = rlen;
1214
1215		np = CAST(const uint16_t *, CAST(const void *, (b + 16)));
1216		nb = CAST(const char *, CAST(const void *,
1217		    (np + cep->ce_namlen)));
1218		if (nb > eb) {
1219			cep->ce_namlen = 0;
1220			break;
1221		}
1222
1223		for (k = 0; k < cep->ce_namlen; k++)
1224			cep->ce_name[k] = np[k]; /* XXX: CDF_TOLE2? */
1225		cep->ce_name[cep->ce_namlen] = 0;
1226		j = i;
1227		i++;
1228	}
1229	(*cat)->cat_num = j;
1230	return 0;
1231}
1232
1233int
1234cdf_print_classid(char *buf, size_t buflen, const cdf_classid_t *id)
1235{
1236	return snprintf(buf, buflen, "%.8x-%.4x-%.4x-%.2x%.2x-"
1237	    "%.2x%.2x%.2x%.2x%.2x%.2x", id->cl_dword, id->cl_word[0],
1238	    id->cl_word[1], id->cl_two[0], id->cl_two[1], id->cl_six[0],
1239	    id->cl_six[1], id->cl_six[2], id->cl_six[3], id->cl_six[4],
1240	    id->cl_six[5]);
1241}
1242
1243static const struct {
1244	uint32_t v;
1245	const char *n;
1246} vn[] = {
1247	{ CDF_PROPERTY_CODE_PAGE, "Code page" },
1248	{ CDF_PROPERTY_TITLE, "Title" },
1249	{ CDF_PROPERTY_SUBJECT, "Subject" },
1250	{ CDF_PROPERTY_AUTHOR, "Author" },
1251	{ CDF_PROPERTY_KEYWORDS, "Keywords" },
1252	{ CDF_PROPERTY_COMMENTS, "Comments" },
1253	{ CDF_PROPERTY_TEMPLATE, "Template" },
1254	{ CDF_PROPERTY_LAST_SAVED_BY, "Last Saved By" },
1255	{ CDF_PROPERTY_REVISION_NUMBER, "Revision Number" },
1256	{ CDF_PROPERTY_TOTAL_EDITING_TIME, "Total Editing Time" },
1257	{ CDF_PROPERTY_LAST_PRINTED, "Last Printed" },
1258	{ CDF_PROPERTY_CREATE_TIME, "Create Time/Date" },
1259	{ CDF_PROPERTY_LAST_SAVED_TIME, "Last Saved Time/Date" },
1260	{ CDF_PROPERTY_NUMBER_OF_PAGES, "Number of Pages" },
1261	{ CDF_PROPERTY_NUMBER_OF_WORDS, "Number of Words" },
1262	{ CDF_PROPERTY_NUMBER_OF_CHARACTERS, "Number of Characters" },
1263	{ CDF_PROPERTY_THUMBNAIL, "Thumbnail" },
1264	{ CDF_PROPERTY_NAME_OF_APPLICATION, "Name of Creating Application" },
1265	{ CDF_PROPERTY_SECURITY, "Security" },
1266	{ CDF_PROPERTY_LOCALE_ID, "Locale ID" },
1267};
1268
1269int
1270cdf_print_property_name(char *buf, size_t bufsiz, uint32_t p)
1271{
1272	size_t i;
1273
1274	for (i = 0; i < __arraycount(vn); i++)
1275		if (vn[i].v == p)
1276			return snprintf(buf, bufsiz, "%s", vn[i].n);
1277	return snprintf(buf, bufsiz, "%#x", p);
1278}
1279
1280int
1281cdf_print_elapsed_time(char *buf, size_t bufsiz, cdf_timestamp_t ts)
1282{
1283	int len = 0;
1284	int days, hours, mins, secs;
1285
1286	ts /= CDF_TIME_PREC;
1287	secs = CAST(int, ts % 60);
1288	ts /= 60;
1289	mins = CAST(int, ts % 60);
1290	ts /= 60;
1291	hours = CAST(int, ts % 24);
1292	ts /= 24;
1293	days = CAST(int, ts);
1294
1295	if (days) {
1296		len += snprintf(buf + len, bufsiz - len, "%dd+", days);
1297		if (CAST(size_t, len) >= bufsiz)
1298			return len;
1299	}
1300
1301	if (days || hours) {
1302		len += snprintf(buf + len, bufsiz - len, "%.2d:", hours);
1303		if (CAST(size_t, len) >= bufsiz)
1304			return len;
1305	}
1306
1307	len += snprintf(buf + len, bufsiz - len, "%.2d:", mins);
1308	if (CAST(size_t, len) >= bufsiz)
1309		return len;
1310
1311	len += snprintf(buf + len, bufsiz - len, "%.2d", secs);
1312	return len;
1313}
1314
1315char *
1316cdf_u16tos8(char *buf, size_t len, const uint16_t *p)
1317{
1318	size_t i;
1319	for (i = 0; i < len && p[i]; i++)
1320		buf[i] = CAST(char, p[i]);
1321	buf[i] = '\0';
1322	return buf;
1323}
1324
1325#ifdef CDF_DEBUG
1326void
1327cdf_dump_header(const cdf_header_t *h)
1328{
1329	size_t i;
1330
1331#define DUMP(a, b) (void)fprintf(stderr, "%40.40s = " a "\n", # b, h->h_ ## b)
1332#define DUMP2(a, b) (void)fprintf(stderr, "%40.40s = " a " (" a ")\n", # b, \
1333    h->h_ ## b, 1 << h->h_ ## b)
1334	DUMP("%d", revision);
1335	DUMP("%d", version);
1336	DUMP("%#x", byte_order);
1337	DUMP2("%d", sec_size_p2);
1338	DUMP2("%d", short_sec_size_p2);
1339	DUMP("%d", num_sectors_in_sat);
1340	DUMP("%d", secid_first_directory);
1341	DUMP("%d", min_size_standard_stream);
1342	DUMP("%d", secid_first_sector_in_short_sat);
1343	DUMP("%d", num_sectors_in_short_sat);
1344	DUMP("%d", secid_first_sector_in_master_sat);
1345	DUMP("%d", num_sectors_in_master_sat);
1346	for (i = 0; i < __arraycount(h->h_master_sat); i++) {
1347		if (h->h_master_sat[i] == CDF_SECID_FREE)
1348			break;
1349		(void)fprintf(stderr, "%35.35s[%.3" SIZE_T_FORMAT "u] = %d\n",
1350		    "master_sat", i, h->h_master_sat[i]);
1351	}
1352}
1353
1354void
1355cdf_dump_sat(const char *prefix, const cdf_sat_t *sat, size_t size)
1356{
1357	size_t i, j, s = size / sizeof(cdf_secid_t);
1358
1359	for (i = 0; i < sat->sat_len; i++) {
1360		(void)fprintf(stderr, "%s[%" SIZE_T_FORMAT "u]:\n%.6"
1361		    SIZE_T_FORMAT "u: ", prefix, i, i * s);
1362		for (j = 0; j < s; j++) {
1363			(void)fprintf(stderr, "%5d, ",
1364			    CDF_TOLE4(sat->sat_tab[s * i + j]));
1365			if ((j + 1) % 10 == 0)
1366				(void)fprintf(stderr, "\n%.6" SIZE_T_FORMAT
1367				    "u: ", i * s + j + 1);
1368		}
1369		(void)fprintf(stderr, "\n");
1370	}
1371}
1372
1373void
1374cdf_dump(const void *v, size_t len)
1375{
1376	size_t i, j;
1377	const unsigned char *p = v;
1378	char abuf[16];
1379
1380	(void)fprintf(stderr, "%.4x: ", 0);
1381	for (i = 0, j = 0; i < len; i++, p++) {
1382		(void)fprintf(stderr, "%.2x ", *p);
1383		abuf[j++] = isprint(*p) ? *p : '.';
1384		if (j == 16) {
1385			j = 0;
1386			abuf[15] = '\0';
1387			(void)fprintf(stderr, "%s\n%.4" SIZE_T_FORMAT "x: ",
1388			    abuf, i + 1);
1389		}
1390	}
1391	(void)fprintf(stderr, "\n");
1392}
1393
1394void
1395cdf_dump_stream(const cdf_stream_t *sst)
1396{
1397	size_t ss = sst->sst_ss;
1398	cdf_dump(sst->sst_tab, ss * sst->sst_len);
1399}
1400
1401void
1402cdf_dump_dir(const cdf_info_t *info, const cdf_header_t *h,
1403    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
1404    const cdf_dir_t *dir)
1405{
1406	size_t i, j;
1407	cdf_directory_t *d;
1408	char name[__arraycount(d->d_name)];
1409	cdf_stream_t scn;
1410	struct timespec ts;
1411
1412	static const char *types[] = { "empty", "user storage",
1413	    "user stream", "lockbytes", "property", "root storage" };
1414
1415	for (i = 0; i < dir->dir_len; i++) {
1416		char buf[26];
1417		d = &dir->dir_tab[i];
1418		for (j = 0; j < sizeof(name); j++)
1419			name[j] = (char)CDF_TOLE2(d->d_name[j]);
1420		(void)fprintf(stderr, "Directory %" SIZE_T_FORMAT "u: %s\n",
1421		    i, name);
1422		if (d->d_type < __arraycount(types))
1423			(void)fprintf(stderr, "Type: %s\n", types[d->d_type]);
1424		else
1425			(void)fprintf(stderr, "Type: %d\n", d->d_type);
1426		(void)fprintf(stderr, "Color: %s\n",
1427		    d->d_color ? "black" : "red");
1428		(void)fprintf(stderr, "Left child: %d\n", d->d_left_child);
1429		(void)fprintf(stderr, "Right child: %d\n", d->d_right_child);
1430		(void)fprintf(stderr, "Flags: %#x\n", d->d_flags);
1431		cdf_timestamp_to_timespec(&ts, d->d_created);
1432		(void)fprintf(stderr, "Created %s", cdf_ctime(&ts.tv_sec, buf));
1433		cdf_timestamp_to_timespec(&ts, d->d_modified);
1434		(void)fprintf(stderr, "Modified %s",
1435		    cdf_ctime(&ts.tv_sec, buf));
1436		(void)fprintf(stderr, "Stream %d\n", d->d_stream_first_sector);
1437		(void)fprintf(stderr, "Size %d\n", d->d_size);
1438		switch (d->d_type) {
1439		case CDF_DIR_TYPE_USER_STORAGE:
1440			(void)fprintf(stderr, "Storage: %d\n", d->d_storage);
1441			break;
1442		case CDF_DIR_TYPE_USER_STREAM:
1443			if (sst == NULL)
1444				break;
1445			if (cdf_read_sector_chain(info, h, sat, ssat, sst,
1446			    d->d_stream_first_sector, d->d_size, &scn) == -1) {
1447				warn("Can't read stream for %s at %d len %d",
1448				    name, d->d_stream_first_sector, d->d_size);
1449				break;
1450			}
1451			cdf_dump_stream(&scn);
1452			free(scn.sst_tab);
1453			break;
1454		default:
1455			break;
1456		}
1457
1458	}
1459}
1460
1461void
1462cdf_dump_property_info(const cdf_property_info_t *info, size_t count)
1463{
1464	cdf_timestamp_t tp;
1465	struct timespec ts;
1466	char buf[64];
1467	size_t i, j;
1468
1469	for (i = 0; i < count; i++) {
1470		cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
1471		(void)fprintf(stderr, "%" SIZE_T_FORMAT "u) %s: ", i, buf);
1472		switch (info[i].pi_type) {
1473		case CDF_NULL:
1474			break;
1475		case CDF_SIGNED16:
1476			(void)fprintf(stderr, "signed 16 [%hd]\n",
1477			    info[i].pi_s16);
1478			break;
1479		case CDF_SIGNED32:
1480			(void)fprintf(stderr, "signed 32 [%d]\n",
1481			    info[i].pi_s32);
1482			break;
1483		case CDF_UNSIGNED32:
1484			(void)fprintf(stderr, "unsigned 32 [%u]\n",
1485			    info[i].pi_u32);
1486			break;
1487		case CDF_FLOAT:
1488			(void)fprintf(stderr, "float [%g]\n",
1489			    info[i].pi_f);
1490			break;
1491		case CDF_DOUBLE:
1492			(void)fprintf(stderr, "double [%g]\n",
1493			    info[i].pi_d);
1494			break;
1495		case CDF_LENGTH32_STRING:
1496			(void)fprintf(stderr, "string %u [%.*s]\n",
1497			    info[i].pi_str.s_len,
1498			    info[i].pi_str.s_len, info[i].pi_str.s_buf);
1499			break;
1500		case CDF_LENGTH32_WSTRING:
1501			(void)fprintf(stderr, "string %u [",
1502			    info[i].pi_str.s_len);
1503			for (j = 0; j < info[i].pi_str.s_len - 1; j++)
1504			    (void)fputc(info[i].pi_str.s_buf[j << 1], stderr);
1505			(void)fprintf(stderr, "]\n");
1506			break;
1507		case CDF_FILETIME:
1508			tp = info[i].pi_tp;
1509			if (tp < 1000000000000000LL) {
1510				cdf_print_elapsed_time(buf, sizeof(buf), tp);
1511				(void)fprintf(stderr, "timestamp %s\n", buf);
1512			} else {
1513				char tbuf[26];
1514				cdf_timestamp_to_timespec(&ts, tp);
1515				(void)fprintf(stderr, "timestamp %s",
1516				    cdf_ctime(&ts.tv_sec, tbuf));
1517			}
1518			break;
1519		case CDF_CLIPBOARD:
1520			(void)fprintf(stderr, "CLIPBOARD %u\n", info[i].pi_u32);
1521			break;
1522		default:
1523			DPRINTF(("Don't know how to deal with %#x\n",
1524			    info[i].pi_type));
1525			break;
1526		}
1527	}
1528}
1529
1530
1531void
1532cdf_dump_summary_info(const cdf_header_t *h, const cdf_stream_t *sst)
1533{
1534	char buf[128];
1535	cdf_summary_info_header_t ssi;
1536	cdf_property_info_t *info;
1537	size_t count;
1538
1539	(void)&h;
1540	if (cdf_unpack_summary_info(sst, h, &ssi, &info, &count) == -1)
1541		return;
1542	(void)fprintf(stderr, "Endian: %#x\n", ssi.si_byte_order);
1543	(void)fprintf(stderr, "Os Version %d.%d\n", ssi.si_os_version & 0xff,
1544	    ssi.si_os_version >> 8);
1545	(void)fprintf(stderr, "Os %d\n", ssi.si_os);
1546	cdf_print_classid(buf, sizeof(buf), &ssi.si_class);
1547	(void)fprintf(stderr, "Class %s\n", buf);
1548	(void)fprintf(stderr, "Count %d\n", ssi.si_count);
1549	cdf_dump_property_info(info, count);
1550	free(info);
1551}
1552
1553
1554void
1555cdf_dump_catalog(const cdf_header_t *h, const cdf_stream_t *sst)
1556{
1557	cdf_catalog_t *cat;
1558	cdf_unpack_catalog(h, sst, &cat);
1559	const cdf_catalog_entry_t *ce = cat->cat_e;
1560	struct timespec ts;
1561	char tbuf[64], sbuf[256];
1562	size_t i;
1563
1564	printf("Catalog:\n");
1565	for (i = 0; i < cat->cat_num; i++) {
1566		cdf_timestamp_to_timespec(&ts, ce[i].ce_timestamp);
1567		printf("\t%d %s %s", ce[i].ce_num,
1568		    cdf_u16tos8(sbuf, ce[i].ce_namlen, ce[i].ce_name),
1569		    cdf_ctime(&ts.tv_sec, tbuf));
1570	}
1571	free(cat);
1572}
1573
1574#endif
1575
1576#ifdef TEST
1577int
1578main(int argc, char *argv[])
1579{
1580	int i;
1581	cdf_header_t h;
1582	cdf_sat_t sat, ssat;
1583	cdf_stream_t sst, scn;
1584	cdf_dir_t dir;
1585	cdf_info_t info;
1586	const cdf_directory_t *root;
1587#ifdef __linux__
1588#define getprogname() __progname
1589	extern char *__progname;
1590#endif
1591	if (argc < 2) {
1592		(void)fprintf(stderr, "Usage: %s <filename>\n", getprogname());
1593		return -1;
1594	}
1595
1596	info.i_buf = NULL;
1597	info.i_len = 0;
1598	for (i = 1; i < argc; i++) {
1599		if ((info.i_fd = open(argv[1], O_RDONLY)) == -1)
1600			err(EXIT_FAILURE, "Cannot open `%s'", argv[1]);
1601
1602		if (cdf_read_header(&info, &h) == -1)
1603			err(EXIT_FAILURE, "Cannot read header");
1604#ifdef CDF_DEBUG
1605		cdf_dump_header(&h);
1606#endif
1607
1608		if (cdf_read_sat(&info, &h, &sat) == -1)
1609			err(EXIT_FAILURE, "Cannot read sat");
1610#ifdef CDF_DEBUG
1611		cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
1612#endif
1613
1614		if (cdf_read_ssat(&info, &h, &sat, &ssat) == -1)
1615			err(EXIT_FAILURE, "Cannot read ssat");
1616#ifdef CDF_DEBUG
1617		cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
1618#endif
1619
1620		if (cdf_read_dir(&info, &h, &sat, &dir) == -1)
1621			err(EXIT_FAILURE, "Cannot read dir");
1622
1623		if (cdf_read_short_stream(&info, &h, &sat, &dir, &sst, &root)
1624		    == -1)
1625			err(EXIT_FAILURE, "Cannot read short stream");
1626#ifdef CDF_DEBUG
1627		cdf_dump_stream(&sst);
1628#endif
1629
1630#ifdef CDF_DEBUG
1631		cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
1632#endif
1633
1634
1635		if (cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
1636		    &scn) == -1)
1637			warn("Cannot read summary info");
1638#ifdef CDF_DEBUG
1639		else
1640			cdf_dump_summary_info(&h, &scn);
1641#endif
1642		if (cdf_read_user_stream(&info, &h, &sat, &ssat, &sst,
1643		    &dir, "Catalog", &scn) == -1)
1644			warn("Cannot read catalog");
1645#ifdef CDF_DEBUG
1646		else
1647			cdf_dump_catalog(&h, &scn);
1648#endif
1649
1650		(void)close(info.i_fd);
1651	}
1652
1653	return 0;
1654}
1655#endif
1656